diff options
author | unknown <jonas@perch.ndb.mysql.com> | 2006-04-06 13:18:13 +0200 |
---|---|---|
committer | unknown <jonas@perch.ndb.mysql.com> | 2006-04-06 13:18:13 +0200 |
commit | 1bbf2f7c9cd7c664e10a10efc1b2f8bcab5bdf02 (patch) | |
tree | acf6f4eefc2c026a4c33edc7bc75482afac22de3 /storage | |
parent | 84c13fdf922e0a4c46d3bab3d142b509fd4d105e (diff) | |
parent | 86ca1bc3b51d476c677b063525c43649c73a88af (diff) | |
download | mariadb-git-1bbf2f7c9cd7c664e10a10efc1b2f8bcab5bdf02.tar.gz |
Merge perch.ndb.mysql.com:/home/jonas/src/50-work
into perch.ndb.mysql.com:/home/jonas/src/51-jonas
storage/ndb/include/kernel/signaldata/CmRegSignalData.hpp:
Auto merged
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
Auto merged
storage/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp:
Auto merged
storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp:
Auto merged
storage/ndb/include/kernel/signaldata/FailRep.hpp:
Auto merged
storage/ndb/include/kernel/signaldata/StopReq.hpp:
Auto merged
storage/ndb/include/kernel/signaldata/WaitGCP.hpp:
Auto merged
storage/ndb/src/common/debugger/EventLogger.cpp:
Auto merged
storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp:
Auto merged
storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp:
Auto merged
storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp:
Auto merged
storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp:
Auto merged
storage/ndb/src/kernel/vm/Configuration.cpp:
Auto merged
storage/ndb/test/ndbapi/testNodeRestart.cpp:
Auto merged
storage/ndb/test/src/NdbRestarts.cpp:
Auto merged
storage/ndb/include/mgmapi/ndb_logevent.h:
merge
storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp:
merge
storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp:
merge
storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp:
merge
storage/ndb/test/run-test/daily-basic-tests.txt:
merge
Diffstat (limited to 'storage')
20 files changed, 1716 insertions, 234 deletions
diff --git a/storage/ndb/include/kernel/signaldata/CmRegSignalData.hpp b/storage/ndb/include/kernel/signaldata/CmRegSignalData.hpp index f33c991249f..ab51ed17bc3 100644 --- a/storage/ndb/include/kernel/signaldata/CmRegSignalData.hpp +++ b/storage/ndb/include/kernel/signaldata/CmRegSignalData.hpp @@ -30,12 +30,17 @@ class CmRegReq { friend class Qmgr; public: - STATIC_CONST( SignalLength = 3 ); + STATIC_CONST( SignalLength = 5 + NdbNodeBitmask::Size ); private: Uint32 blockRef; Uint32 nodeId; - Uint32 version; // See ndb_version.h + Uint32 version; // See ndb_version.h + + Uint32 start_type; // As specified by cmd-line or mgm, NodeState::StartType + Uint32 latest_gci; // 0 means no fs + Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_ + // to be part of restart }; /** @@ -59,8 +64,7 @@ private: * The dynamic id that the node reciving this signal has */ Uint32 dynamicId; - - Uint32 allNdbNodes[NdbNodeBitmask::Size]; + Uint32 allNdbNodes[NdbNodeBitmask::Size]; }; /** @@ -73,7 +77,7 @@ class CmRegRef { friend class Qmgr; public: - STATIC_CONST( SignalLength = 4 ); + STATIC_CONST( SignalLength = 7 + NdbNodeBitmask::Size ); enum ErrorCode { ZBUSY = 0, /* Only the president can send this */ @@ -85,14 +89,27 @@ public: * as president. */ ZNOT_PRESIDENT = 5, /* We are not president */ ZNOT_DEAD = 6, /* We are not dead when we are starting */ - ZINCOMPATIBLE_VERSION = 7 + ZINCOMPATIBLE_VERSION = 7, + ZINCOMPATIBLE_START_TYPE = 8 }; private: Uint32 blockRef; Uint32 nodeId; Uint32 errorCode; + /** + * Applicable if ZELECTION + */ Uint32 presidentCandidate; + Uint32 candidate_latest_gci; // 0 means non + + /** + * Data for sending node sending node + */ + Uint32 latest_gci; + Uint32 start_type; + Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_ + // to be part of restart }; class CmAdd { diff --git a/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp b/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp index ef165b6c16f..3e3d926a999 100644 --- a/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp +++ b/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp @@ -64,6 +64,7 @@ public: // 19 NDBFS Fipple with O_SYNC, O_CREATE etc. // 20-24 BACKUP NdbcntrTestStopOnError = 25, + NdbcntrStopNodes = 70, // 100-105 TUP and ACC // 200-240 UTIL // 300-305 TRIX diff --git a/storage/ndb/include/kernel/signaldata/FailRep.hpp b/storage/ndb/include/kernel/signaldata/FailRep.hpp index 44577f07fdc..f2250f1af73 100644 --- a/storage/ndb/include/kernel/signaldata/FailRep.hpp +++ b/storage/ndb/include/kernel/signaldata/FailRep.hpp @@ -18,6 +18,7 @@ #define FAIL_REP_HPP #include "SignalData.hpp" +#include <NodeBitmask.hpp> /** * @@ -27,6 +28,7 @@ class FailRep { * Sender(s) & Reciver(s) */ friend class Qmgr; + friend class Ndbcntr; /** * For printing @@ -35,7 +37,8 @@ class FailRep { public: STATIC_CONST( SignalLength = 2 ); - + STATIC_CONST( ExtraLength = 1 + NdbNodeBitmask::Size ); + enum FailCause { ZOWN_FAILURE=0, ZOTHER_NODE_WHEN_WE_START=1, @@ -43,13 +46,20 @@ public: ZSTART_IN_REGREQ=3, ZHEARTBEAT_FAILURE=4, ZLINK_FAILURE=5, - ZOTHERNODE_FAILED_DURING_START=6 + ZOTHERNODE_FAILED_DURING_START=6, + ZMULTI_NODE_SHUTDOWN = 7, + ZPARTITIONED_CLUSTER = 8 }; - + private: Uint32 failNodeId; Uint32 failCause; + /** + * Used when failCause == ZPARTITIONED_CLUSTER + */ + Uint32 president; + Uint32 partition[NdbNodeBitmask::Size]; }; diff --git a/storage/ndb/include/kernel/signaldata/StopReq.hpp b/storage/ndb/include/kernel/signaldata/StopReq.hpp index 8e6a0b90a91..8a9fde75b6c 100644 --- a/storage/ndb/include/kernel/signaldata/StopReq.hpp +++ b/storage/ndb/include/kernel/signaldata/StopReq.hpp @@ -32,7 +32,7 @@ class StopReq friend class MgmtSrvr; public: - STATIC_CONST( SignalLength = 9 ); + STATIC_CONST( SignalLength = 9 + NdbNodeBitmask::Size); public: Uint32 senderRef; @@ -49,29 +49,34 @@ public: Int32 readOperationTimeout; // Timeout before read operations are aborted Int32 operationTimeout; // Timeout before all operations are aborted + Uint32 nodes[NdbNodeBitmask::Size]; + static void setSystemStop(Uint32 & requestInfo, bool value); static void setPerformRestart(Uint32 & requestInfo, bool value); static void setNoStart(Uint32 & requestInfo, bool value); static void setInitialStart(Uint32 & requestInfo, bool value); - static void setEscalateOnNodeFail(Uint32 & requestInfo, bool value); /** * Don't perform "graceful" shutdown/restart... */ static void setStopAbort(Uint32 & requestInfo, bool value); + static void setStopNodes(Uint32 & requestInfo, bool value); static bool getSystemStop(const Uint32 & requestInfo); static bool getPerformRestart(const Uint32 & requestInfo); static bool getNoStart(const Uint32 & requestInfo); static bool getInitialStart(const Uint32 & requestInfo); - static bool getEscalateOnNodeFail(const Uint32 & requestInfo); static bool getStopAbort(const Uint32 & requestInfo); + static bool getStopNodes(const Uint32 & requestInfo); }; struct StopConf { STATIC_CONST( SignalLength = 2 ); Uint32 senderData; - Uint32 nodeState; + union { + Uint32 nodeState; + Uint32 nodeId; + }; }; class StopRef @@ -94,7 +99,9 @@ public: NodeShutdownInProgress = 1, SystemShutdownInProgress = 2, NodeShutdownWouldCauseSystemCrash = 3, - TransactionAbortFailed = 4 + TransactionAbortFailed = 4, + UnsupportedNodeShutdown = 5, + MultiNodeShutdownNotMaster = 6 }; public: @@ -132,16 +139,16 @@ StopReq::getInitialStart(const Uint32 & requestInfo) inline bool -StopReq::getEscalateOnNodeFail(const Uint32 & requestInfo) +StopReq::getStopAbort(const Uint32 & requestInfo) { - return requestInfo & 16; + return requestInfo & 32; } inline bool -StopReq::getStopAbort(const Uint32 & requestInfo) +StopReq::getStopNodes(const Uint32 & requestInfo) { - return requestInfo & 32; + return requestInfo & 64; } @@ -187,24 +194,23 @@ StopReq::setInitialStart(Uint32 & requestInfo, bool value) inline void -StopReq::setEscalateOnNodeFail(Uint32 & requestInfo, bool value) +StopReq::setStopAbort(Uint32 & requestInfo, bool value) { if(value) - requestInfo |= 16; + requestInfo |= 32; else - requestInfo &= ~16; + requestInfo &= ~32; } inline void -StopReq::setStopAbort(Uint32 & requestInfo, bool value) +StopReq::setStopNodes(Uint32 & requestInfo, bool value) { if(value) - requestInfo |= 32; + requestInfo |= 64; else - requestInfo &= ~32; + requestInfo &= ~64; } - #endif diff --git a/storage/ndb/include/kernel/signaldata/WaitGCP.hpp b/storage/ndb/include/kernel/signaldata/WaitGCP.hpp index ebed28714d2..be2a5b9d5f0 100644 --- a/storage/ndb/include/kernel/signaldata/WaitGCP.hpp +++ b/storage/ndb/include/kernel/signaldata/WaitGCP.hpp @@ -46,7 +46,9 @@ public: Complete = 1, ///< Wait for a GCP to complete CompleteForceStart = 2, ///< Wait for a GCP to complete start one if needed CompleteIfRunning = 3, ///< Wait for ongoing GCP - CurrentGCI = 8 ///< Immediately return current GCI + CurrentGCI = 8, ///< Immediately return current GCI + BlockStartGcp = 9, + UnblockStartGcp = 10 }; Uint32 senderRef; @@ -70,11 +72,12 @@ class WaitGCPConf { //friend class Grep::PSCoord; public: - STATIC_CONST( SignalLength = 2 ); + STATIC_CONST( SignalLength = 3 ); public: Uint32 senderData; Uint32 gcp; + Uint32 blockStatus; }; class WaitGCPRef { diff --git a/storage/ndb/include/mgmapi/ndb_logevent.h b/storage/ndb/include/mgmapi/ndb_logevent.h index df4c228ec8a..73dd192e305 100644 --- a/storage/ndb/include/mgmapi/ndb_logevent.h +++ b/storage/ndb/include/mgmapi/ndb_logevent.h @@ -169,9 +169,13 @@ extern "C" { NDB_LE_BackupAborted = 57, /** NDB_MGM_EVENT_CATEGORY_INFO */ - NDB_LE_EventBufferStatus = 58 + NDB_LE_EventBufferStatus = 58, /* 59 used */ + + /** NDB_MGM_EVENT_CATEGORY_STARTUP */ + NDB_LE_StartReport = 60 + /* 60 unused */ /* 61 unused */ /* 62 unused */ @@ -637,6 +641,13 @@ extern "C" { unsigned type; unsigned node_id; } SingleUser; + /** Log even data @ref NDB_LE_StartReport */ + struct { + unsigned report_type; + unsigned remaining_time; + unsigned bitmask_size; + unsigned bitmask_data[1]; + } StartReport; #ifndef DOXYGEN_FIX }; #else diff --git a/storage/ndb/src/common/debugger/EventLogger.cpp b/storage/ndb/src/common/debugger/EventLogger.cpp index cfa06e96085..db9b8417c0d 100644 --- a/storage/ndb/src/common/debugger/EventLogger.cpp +++ b/storage/ndb/src/common/debugger/EventLogger.cpp @@ -743,6 +743,90 @@ void getTextSingleUser(QQQQ) { } } +void getTextStartReport(QQQQ) { + Uint32 time = theData[2]; + Uint32 sz = theData[3]; + char mask1[100]; + char mask2[100]; + char mask3[100]; + char mask4[100]; + BitmaskImpl::getText(sz, theData + 4 + (0 * sz), mask1); + BitmaskImpl::getText(sz, theData + 4 + (1 * sz), mask2); + BitmaskImpl::getText(sz, theData + 4 + (2 * sz), mask3); + BitmaskImpl::getText(sz, theData + 4 + (3 * sz), mask4); + switch(theData[1]){ + case 1: // Wait initial + BaseString::snprintf + (m_text, m_text_len, + "Initial start, waiting for %s to connect, " + " nodes [ all: %s connected: %s no-wait: %s ]", + mask4, mask1, mask2, mask3); + break; + case 2: // Wait partial + BaseString::snprintf + (m_text, m_text_len, + "Waiting until nodes: %s connects, " + "nodes [ all: %s connected: %s no-wait: %s ]", + mask4, mask1, mask2, mask3); + break; + case 3: // Wait partial timeout + BaseString::snprintf + (m_text, m_text_len, + "Waiting %u sec for nodes %s to connect, " + "nodes [ all: %s connected: %s no-wait: %s ]", + + time, mask4, mask1, mask2, mask3); + break; + case 4: // Wait partioned + BaseString::snprintf + (m_text, m_text_len, + "Waiting for non partitioned start, " + "nodes [ all: %s connected: %s missing: %s no-wait: %s ]", + + mask1, mask2, mask4, mask3); + break; + case 5: + BaseString::snprintf + (m_text, m_text_len, + "Waiting %u sec for non partitioned start, " + "nodes [ all: %s connected: %s missing: %s no-wait: %s ]", + + time, mask1, mask2, mask4, mask3); + break; + case 0x8000: // Do initial + BaseString::snprintf + (m_text, m_text_len, + "Initial start with nodes %s [ missing: %s no-wait: %s ]", + mask2, mask4, mask3); + break; + case 0x8001: // Do start + BaseString::snprintf + (m_text, m_text_len, + "Start with all nodes %s", + mask2); + break; + case 0x8002: // Do partial + BaseString::snprintf + (m_text, m_text_len, + "Start with nodes %s [ missing: %s no-wait: %s ]", + mask2, mask4, mask3); + break; + case 0x8003: // Do partioned + BaseString::snprintf + (m_text, m_text_len, + "Start potentially partitioned with nodes %s " + " [ missing: %s no-wait: %s ]", + mask2, mask4, mask3); + break; + default: + BaseString::snprintf + (m_text, m_text_len, + "Unknown startreport: 0x%x [ %s %s %s %s ]", + theData[1], + mask1, mask2, mask3, mask4); + } +} + #if 0 BaseString::snprintf(m_text, m_text_len, @@ -791,6 +875,7 @@ const EventLoggerBase::EventRepLogLevelMatrix EventLoggerBase::matrix[] = { ROW(StartREDOLog, LogLevel::llStartUp, 10, Logger::LL_INFO ), ROW(StartLog, LogLevel::llStartUp, 10, Logger::LL_INFO ), ROW(UNDORecordsExecuted, LogLevel::llStartUp, 15, Logger::LL_INFO ), + ROW(StartReport, LogLevel::llStartUp, 4, Logger::LL_INFO ), // NODERESTART ROW(NR_CopyDict, LogLevel::llNodeRestart, 8, Logger::LL_INFO ), diff --git a/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp b/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp index 6d3df7b2047..c3c35ac4ab2 100644 --- a/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp +++ b/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp @@ -134,6 +134,9 @@ Cmvmi::~Cmvmi() m_shared_page_pool.clear(); } +#ifdef ERROR_INSERT +NodeBitmask c_error_9000_nodes_mask; +#endif void Cmvmi::execNDB_TAMPER(Signal* signal) { @@ -441,21 +444,33 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal) const Uint32 len = signal->getLength(); if(len == 2){ - globalTransporterRegistry.do_connect(tStartingNode); - globalTransporterRegistry.setIOState(tStartingNode, HaltIO); - //----------------------------------------------------- - // Report that the connection to the node is opened - //----------------------------------------------------- - signal->theData[0] = NDB_LE_CommunicationOpened; - signal->theData[1] = tStartingNode; - sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); - //----------------------------------------------------- +#ifdef ERROR_INSERT + if (! (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(tStartingNode))) +#endif + { + globalTransporterRegistry.do_connect(tStartingNode); + globalTransporterRegistry.setIOState(tStartingNode, HaltIO); + + //----------------------------------------------------- + // Report that the connection to the node is opened + //----------------------------------------------------- + signal->theData[0] = NDB_LE_CommunicationOpened; + signal->theData[1] = tStartingNode; + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); + //----------------------------------------------------- + } } else { for(unsigned int i = 1; i < MAX_NODES; i++ ) { jam(); if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){ jam(); + +#ifdef ERROR_INSERT + if (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(i)) + continue; +#endif + globalTransporterRegistry.do_connect(i); globalTransporterRegistry.setIOState(i, HaltIO); @@ -1064,7 +1079,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) } DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0]; - if (dumpState->args[0] == DumpStateOrd::CmvmiDumpConnections){ + Uint32 arg = dumpState->args[0]; + if (arg == DumpStateOrd::CmvmiDumpConnections){ for(unsigned int i = 1; i < MAX_NODES; i++ ){ const char* nodeTypeStr = ""; switch(getNodeInfo(i).m_type){ @@ -1094,7 +1110,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) } } - if (dumpState->args[0] == DumpStateOrd::CmvmiDumpLongSignalMemory){ + if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){ infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d", g_sectionSegmentPool.getSize(), g_sectionSegmentPool.getNoOfFree()); @@ -1131,7 +1147,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) return; } - if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert) + if (arg == DumpStateOrd::CmvmiSetRestartOnErrorInsert) { if(signal->getLength() == 1) { @@ -1151,7 +1167,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) } } - if (dumpState->args[0] == DumpStateOrd::CmvmiTestLongSigWithDelay) { + if (arg == DumpStateOrd::CmvmiTestLongSigWithDelay) { unsigned i; Uint32 loopCount = dumpState->args[1]; const unsigned len0 = 11; @@ -1179,6 +1195,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) sendSignal(reference(), GSN_TESTSIG, signal, 8, JBB, ptr, 2); } +#ifdef ERROR_INSERT + if (arg == 9000) + { + SET_ERROR_INSERT_VALUE(9000); + for (Uint32 i = 1; i<signal->getLength(); i++) + c_error_9000_nodes_mask.set(signal->theData[i]); + } + + if (arg == 9001) + { + CLEAR_ERROR_INSERT_VALUE; + for (Uint32 i = 0; i<MAX_NODES; i++) + { + if (c_error_9000_nodes_mask.get(i)) + { + signal->theData[0] = 0; + signal->theData[1] = i; + EXECUTE_DIRECT(CMVMI, GSN_OPEN_COMREQ, signal, 2); + } + } + c_error_9000_nodes_mask.clear(); + } +#endif + #ifdef VM_TRACE #if 0 { diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp index aaf27b351be..a6ec3749606 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp +++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp @@ -58,6 +58,7 @@ void Dbdih::initData() cwaitLcpSr = false; c_blockCommit = false; c_blockCommitNo = 1; + cntrlblockref = RNIL; }//Dbdih::initData() void Dbdih::initRecords() diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 95c691f0c6b..9936217bbf3 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -11995,7 +11995,7 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[]) Uint32 tmngNode; Uint32 tmngNodeGroup; Uint32 tmngLimit; - Uint32 i; + Uint32 i, j; /**----------------------------------------------------------------------- * ASSIGN ALL ACTIVE NODES INTO NODE GROUPS. HOT SPARE NODES ARE ASSIGNED @@ -12041,6 +12041,38 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[]) Sysfile::setNodeGroup(mngNodeptr.i, SYSFILE->nodeGroups, mngNodeptr.p->nodeGroup); }//if }//for + + for (i = 0; i<cnoOfNodeGroups; i++) + { + jam(); + bool alive = false; + NodeGroupRecordPtr NGPtr; + NGPtr.i = i; + ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord); + for (j = 0; j<NGPtr.p->nodeCount; j++) + { + jam(); + mngNodeptr.i = NGPtr.p->nodesInGroup[j]; + ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord); + if (checkNodeAlive(NGPtr.p->nodesInGroup[j])) + { + alive = true; + break; + } + } + + if (!alive) + { + char buf[255]; + BaseString::snprintf + (buf, sizeof(buf), + "Illegal initial start, no alive node in nodegroup %u", i); + progError(__LINE__, + NDBD_EXIT_SR_RESTARTCONFLICT, + buf); + + } + } }//Dbdih::makeNodeGroups() /** @@ -12851,7 +12883,6 @@ void Dbdih::sendStartFragreq(Signal* signal, void Dbdih::setInitialActiveStatus() { NodeRecordPtr siaNodeptr; - Uint32 tsiaNodeActiveStatus; Uint32 tsiaNoActiveNodes; tsiaNoActiveNodes = csystemnodes - cnoHotSpare; @@ -12859,39 +12890,34 @@ void Dbdih::setInitialActiveStatus() SYSFILE->nodeStatus[i] = 0; for (siaNodeptr.i = 1; siaNodeptr.i < MAX_NDB_NODES; siaNodeptr.i++) { ptrAss(siaNodeptr, nodeRecord); - if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE) { + switch(siaNodeptr.p->nodeStatus){ + case NodeRecord::ALIVE: + case NodeRecord::DEAD: if (tsiaNoActiveNodes == 0) { jam(); siaNodeptr.p->activeStatus = Sysfile::NS_HotSpare; } else { jam(); tsiaNoActiveNodes = tsiaNoActiveNodes - 1; - siaNodeptr.p->activeStatus = Sysfile::NS_Active; - }//if - } else { - jam(); - siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined; - }//if - switch (siaNodeptr.p->activeStatus) { - case Sysfile::NS_Active: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_Active; - break; - case Sysfile::NS_HotSpare: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_HotSpare; - break; - case Sysfile::NS_NotDefined: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_NotDefined; + if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE) + { + jam(); + siaNodeptr.p->activeStatus = Sysfile::NS_Active; + } + else + { + siaNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver; + } + } break; default: - ndbrequire(false); - return; + jam(); + siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined; break; - }//switch - Sysfile::setNodeStatus(siaNodeptr.i, SYSFILE->nodeStatus, - tsiaNodeActiveStatus); + }//if + Sysfile::setNodeStatus(siaNodeptr.i, + SYSFILE->nodeStatus, + siaNodeptr.p->activeStatus); }//for }//Dbdih::setInitialActiveStatus() @@ -14613,11 +14639,36 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal) jam(); conf->senderData = senderData; conf->gcp = cnewgcp; + conf->blockStatus = cgcpOrderBlocked; sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, WaitGCPConf::SignalLength, JBB); return; }//if + if (requestType == WaitGCPReq::BlockStartGcp) + { + jam(); + conf->senderData = senderData; + conf->gcp = cnewgcp; + conf->blockStatus = cgcpOrderBlocked; + sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, + WaitGCPConf::SignalLength, JBB); + cgcpOrderBlocked = 1; + return; + } + + if (requestType == WaitGCPReq::UnblockStartGcp) + { + jam(); + conf->senderData = senderData; + conf->gcp = cnewgcp; + conf->blockStatus = cgcpOrderBlocked; + sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, + WaitGCPConf::SignalLength, JBB); + cgcpOrderBlocked = 0; + return; + } + if(isMaster()) { /** * Master @@ -14629,6 +14680,7 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal) jam(); conf->senderData = senderData; conf->gcp = coldgcp; + conf->blockStatus = cgcpOrderBlocked; sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, WaitGCPConf::SignalLength, JBB); return; @@ -14715,6 +14767,7 @@ void Dbdih::execWAIT_GCP_CONF(Signal* signal) conf->senderData = ptr.p->clientData; conf->gcp = gcp; + conf->blockStatus = cgcpOrderBlocked; sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_CONF, signal, WaitGCPConf::SignalLength, JBB); @@ -14782,6 +14835,7 @@ void Dbdih::emptyWaitGCPMasterQueue(Signal* signal) c_waitGCPMasterList.next(ptr); conf->senderData = clientData; + conf->blockStatus = cgcpOrderBlocked; sendSignal(clientRef, GSN_WAIT_GCP_CONF, signal, WaitGCPConf::SignalLength, JBB); diff --git a/storage/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp b/storage/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp index ffebce36757..da6aeaf8e8f 100644 --- a/storage/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp +++ b/storage/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp @@ -204,6 +204,7 @@ private: void execWAIT_GCP_CONF(Signal* signal); void execSTOP_REQ(Signal* signal); + void execSTOP_CONF(Signal* signal); void execRESUME_REQ(Signal* signal); void execCHANGE_NODE_STATE_CONF(Signal* signal); @@ -339,6 +340,16 @@ public: void progError(int line, int cause, const char * extra) { cntr.progError(line, cause, extra); } + + enum StopNodesStep { + SR_BLOCK_GCP_START_GCP = 0, + SR_WAIT_COMPLETE_GCP = 1, + SR_UNBLOCK_GCP_START_GCP = 2, + SR_QMGR_STOP_REQ = 3, + SR_WAIT_NODE_FAILURES = 4, + SR_CLUSTER_SHUTDOWN = 12 + } m_state; + SignalCounter m_stop_req_counter; }; private: StopRecord c_stopRec; diff --git a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp index d2e2f1d2335..ed37622657d 100644 --- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp +++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp @@ -88,6 +88,7 @@ Ndbcntr::Ndbcntr(Block_context& ctx): addRecSignal(GSN_STOP_ME_CONF, &Ndbcntr::execSTOP_ME_CONF); addRecSignal(GSN_STOP_REQ, &Ndbcntr::execSTOP_REQ); + addRecSignal(GSN_STOP_CONF, &Ndbcntr::execSTOP_CONF); addRecSignal(GSN_RESUME_REQ, &Ndbcntr::execRESUME_REQ); addRecSignal(GSN_WAIT_GCP_REF, &Ndbcntr::execWAIT_GCP_REF); diff --git a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp index eced400d480..47649f3899b 100644 --- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp @@ -42,6 +42,8 @@ #include <signaldata/FsRemoveReq.hpp> #include <signaldata/ReadConfig.hpp> +#include <signaldata/FailRep.hpp> + #include <AttributeHeader.hpp> #include <Configuration.hpp> #include <DebuggerNames.hpp> @@ -853,17 +855,9 @@ Ndbcntr::trySystemRestart(Signal* signal){ return false; } - if(!allNodes && c_start.m_startPartialTimeout > now){ - jam(); - return false; - } - NodeState::StartType srType = NodeState::ST_SYSTEM_RESTART; - if(c_start.m_waiting.equal(c_start.m_withoutLog)){ - if(!allNodes){ - jam(); - return false; - } + if(c_start.m_waiting.equal(c_start.m_withoutLog)) + { jam(); srType = NodeState::ST_INITIAL_START; c_start.m_starting = c_start.m_withoutLog; // Used for starting... @@ -893,10 +887,6 @@ Ndbcntr::trySystemRestart(Signal* signal){ ndbrequire(false); // All nodes -> partitioning, which is not allowed } - if(c_start.m_startPartitionedTimeout > now){ - jam(); - return false; - } break; } @@ -1512,13 +1502,74 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal) sendSignal(QMGR_REF, GSN_NODE_FAILREP, signal, NodeFailRep::SignalLength, JBB); + if (c_stopRec.stopReq.senderRef) + { + jam(); + switch(c_stopRec.m_state){ + case StopRecord::SR_WAIT_NODE_FAILURES: + { + jam(); + NdbNodeBitmask tmp; + tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + tmp.bitANDC(allFailed); + tmp.copyto(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + + if (tmp.isclear()) + { + jam(); + if (c_stopRec.stopReq.senderRef != RNIL) + { + jam(); + StopConf * const stopConf = (StopConf *)&signal->theData[0]; + stopConf->senderData = c_stopRec.stopReq.senderData; + stopConf->nodeState = (Uint32) NodeState::SL_SINGLEUSER; + sendSignal(c_stopRec.stopReq.senderRef, GSN_STOP_CONF, signal, + StopConf::SignalLength, JBB); + } + + c_stopRec.stopReq.senderRef = 0; + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = reference(); + req->senderData = StopRecord::SR_UNBLOCK_GCP_START_GCP; + req->requestType = WaitGCPReq::UnblockStartGcp; + sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBA); + } + break; + } + case StopRecord::SR_QMGR_STOP_REQ: + { + NdbNodeBitmask tmp; + tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + tmp.bitANDC(allFailed); + + if (tmp.isclear()) + { + Uint32 nodeId = allFailed.find(0); + tmp.set(nodeId); + + StopConf* conf = (StopConf*)signal->getDataPtrSend(); + conf->senderData = c_stopRec.stopReq.senderData; + conf->nodeId = nodeId; + sendSignal(reference(), + GSN_STOP_CONF, signal, StopConf::SignalLength, JBB); + } + + tmp.copyto(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + + break; + } + } + } + + signal->theData[0] = NDB_LE_NODE_FAILREP; + signal->theData[2] = 0; + Uint32 nodeId = 0; while(!allFailed.isclear()){ nodeId = allFailed.find(nodeId + 1); allFailed.clear(nodeId); - signal->theData[0] = NDB_LE_NODE_FAILREP; signal->theData[1] = nodeId; - signal->theData[2] = 0; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); }//for @@ -1964,13 +2015,15 @@ void Ndbcntr::execDUMP_STATE_ORD(Signal* signal) { DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0]; - if(signal->theData[0] == 13){ + Uint32 arg = dumpState->args[0]; + + if(arg == 13){ infoEvent("Cntr: cstartPhase = %d, cinternalStartphase = %d, block = %d", cstartPhase, cinternalStartphase, cndbBlocksCount); infoEvent("Cntr: cmasterNodeId = %d", cmasterNodeId); } - if (dumpState->args[0] == DumpStateOrd::NdbcntrTestStopOnError){ + if (arg == DumpStateOrd::NdbcntrTestStopOnError){ if (m_ctx.m_config.stopOnError() == true) ((Configuration&)m_ctx.m_config).stopOnError(false); @@ -1983,6 +2036,28 @@ Ndbcntr::execDUMP_STATE_ORD(Signal* signal) SystemError::SignalLength, JBA); } + if (arg == DumpStateOrd::NdbcntrStopNodes) + { + NdbNodeBitmask mask; + for(Uint32 i = 1; i<signal->getLength(); i++) + mask.set(signal->theData[i]); + + StopReq* req = (StopReq*)signal->getDataPtrSend(); + req->senderRef = RNIL; + req->senderData = 123; + req->requestInfo = 0; + req->singleuser = 0; + req->singleUserApi = 0; + mask.copyto(NdbNodeBitmask::Size, req->nodes); + StopReq::setPerformRestart(req->requestInfo, 1); + StopReq::setNoStart(req->requestInfo, 1); + StopReq::setStopNodes(req->requestInfo, 1); + StopReq::setStopAbort(req->requestInfo, 1); + + sendSignal(reference(), GSN_STOP_REQ, signal, + StopReq::SignalLength, JBB); + return; + } }//Ndbcntr::execDUMP_STATE_ORD() @@ -2043,9 +2118,12 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ Uint32 senderData = req->senderData; BlockReference senderRef = req->senderRef; bool abort = StopReq::getStopAbort(req->requestInfo); + bool stopnodes = StopReq::getStopNodes(req->requestInfo); - if(getNodeState().startLevel < NodeState::SL_STARTED || - abort && !singleuser){ + if(!singleuser && + (getNodeState().startLevel < NodeState::SL_STARTED || + (abort && !stopnodes))) + { /** * Node is not started yet * @@ -2087,21 +2165,71 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ else ref->errorCode = StopRef::NodeShutdownInProgress; ref->senderData = senderData; - sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); + + if (senderRef != RNIL) + sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); + return; + } + + if (stopnodes && !abort) + { + jam(); + ref->errorCode = StopRef::UnsupportedNodeShutdown; + ref->senderData = senderData; + if (senderRef != RNIL) + sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); + return; + } + + if (stopnodes && cmasterNodeId != getOwnNodeId()) + { + jam(); + ref->errorCode = StopRef::MultiNodeShutdownNotMaster; + ref->senderData = senderData; + if (senderRef != RNIL) + sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); return; } c_stopRec.stopReq = * req; c_stopRec.stopInitiatedTime = NdbTick_CurrentMillisecond(); - if(!singleuser) { - if(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)) { + if (stopnodes) + { + jam(); + + if(!c_stopRec.checkNodeFail(signal)) + { + jam(); + return; + } + + char buf[100]; + NdbNodeBitmask mask; + mask.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + infoEvent("Initiating shutdown abort of %s", mask.getText(buf)); + ndbout_c("Initiating shutdown abort of %s", mask.getText(buf)); + + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = reference(); + req->senderData = StopRecord::SR_BLOCK_GCP_START_GCP; + req->requestType = WaitGCPReq::BlockStartGcp; + sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBB); + return; + } + else if(!singleuser) + { + if(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)) + { jam(); - if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){ + if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)) + { ((Configuration&)m_ctx.m_config).stopOnError(false); } } - if(!c_stopRec.checkNodeFail(signal)){ + if(!c_stopRec.checkNodeFail(signal)) + { jam(); return; } @@ -2171,7 +2299,17 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){ */ NodeBitmask ndbMask; ndbMask.assign(cntr.c_startedNodes); - ndbMask.clear(cntr.getOwnNodeId()); + + if (StopReq::getStopNodes(stopReq.requestInfo)) + { + NdbNodeBitmask tmp; + tmp.assign(NdbNodeBitmask::Size, stopReq.nodes); + ndbMask.bitANDC(tmp); + } + else + { + ndbMask.clear(cntr.getOwnNodeId()); + } CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0]; sd->blockRef = cntr.reference(); @@ -2193,7 +2331,8 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){ ref->errorCode = StopRef::NodeShutdownWouldCauseSystemCrash; const BlockReference bref = stopReq.senderRef; - cntr.sendSignal(bref, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); + if (bref != RNIL) + cntr.sendSignal(bref, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); stopReq.senderRef = 0; @@ -2243,23 +2382,23 @@ Ndbcntr::StopRecord::checkTcTimeout(Signal* signal){ if(stopReq.getSystemStop(stopReq.requestInfo) || stopReq.singleuser){ jam(); if(stopReq.singleuser) - { - jam(); - AbortAllReq * req = (AbortAllReq*)&signal->theData[0]; - req->senderRef = cntr.reference(); - req->senderData = 12; - cntr.sendSignal(DBTC_REF, GSN_ABORT_ALL_REQ, signal, - AbortAllReq::SignalLength, JBB); - } + { + jam(); + AbortAllReq * req = (AbortAllReq*)&signal->theData[0]; + req->senderRef = cntr.reference(); + req->senderData = 12; + cntr.sendSignal(DBTC_REF, GSN_ABORT_ALL_REQ, signal, + AbortAllReq::SignalLength, JBB); + } else - { - WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; - req->senderRef = cntr.reference(); - req->senderData = 12; - req->requestType = WaitGCPReq::CompleteForceStart; - cntr.sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, - WaitGCPReq::SignalLength, JBB); - } + { + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = cntr.reference(); + req->senderData = StopRecord::SR_CLUSTER_SHUTDOWN; + req->requestType = WaitGCPReq::CompleteForceStart; + cntr.sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBB); + } } else { jam(); StopPermReq * req = (StopPermReq*)&signal->theData[0]; @@ -2421,7 +2560,7 @@ void Ndbcntr::execWAIT_GCP_REF(Signal* signal){ WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; req->senderRef = reference(); - req->senderData = 12; + req->senderData = StopRecord::SR_CLUSTER_SHUTDOWN; req->requestType = WaitGCPReq::CompleteForceStart; sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, WaitGCPReq::SignalLength, JBB); @@ -2430,29 +2569,129 @@ void Ndbcntr::execWAIT_GCP_REF(Signal* signal){ void Ndbcntr::execWAIT_GCP_CONF(Signal* signal){ jamEntry(); - ndbrequire(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)); - NodeState newState(NodeState::SL_STOPPING_3, true); + WaitGCPConf* conf = (WaitGCPConf*)signal->getDataPtr(); - /** - * Inform QMGR so that arbitrator won't kill us - */ - NodeStateRep * rep = (NodeStateRep *)&signal->theData[0]; - rep->nodeState = newState; - rep->nodeState.masterNodeId = cmasterNodeId; - rep->nodeState.setNodeGroup(c_nodeGroup); - EXECUTE_DIRECT(QMGR, GSN_NODE_STATE_REP, signal, NodeStateRep::SignalLength); - - if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){ - jam(); - StartOrd * startOrd = (StartOrd *)&signal->theData[0]; - startOrd->restartInfo = c_stopRec.stopReq.requestInfo; - sendSignalWithDelay(CMVMI_REF, GSN_START_ORD, signal, 500, - StartOrd::SignalLength); - } else { + switch(conf->senderData){ + case StopRecord::SR_BLOCK_GCP_START_GCP: + { + jam(); + /** + * + */ + if(!c_stopRec.checkNodeFail(signal)) + { + jam(); + goto unblock; + } + + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = reference(); + req->senderData = StopRecord::SR_WAIT_COMPLETE_GCP; + req->requestType = WaitGCPReq::CompleteIfRunning; + + sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBB); + return; + } + case StopRecord::SR_UNBLOCK_GCP_START_GCP: + { + jam(); + return; + } + case StopRecord::SR_WAIT_COMPLETE_GCP: + { jam(); - sendSignalWithDelay(CMVMI_REF, GSN_STOP_ORD, signal, 500, 1); + if(!c_stopRec.checkNodeFail(signal)) + { + jam(); + goto unblock; + } + + NdbNodeBitmask tmp; + tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + c_stopRec.m_stop_req_counter = tmp; + NodeReceiverGroup rg(QMGR, tmp); + StopReq * stopReq = (StopReq *)&signal->theData[0]; + * stopReq = c_stopRec.stopReq; + stopReq->senderRef = reference(); + sendSignal(rg, GSN_STOP_REQ, signal, StopReq::SignalLength, JBA); + c_stopRec.m_state = StopRecord::SR_QMGR_STOP_REQ; + return; + } + case StopRecord::SR_CLUSTER_SHUTDOWN: + { + jam(); + break; + } + } + + { + ndbrequire(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)); + NodeState newState(NodeState::SL_STOPPING_3, true); + + /** + * Inform QMGR so that arbitrator won't kill us + */ + NodeStateRep * rep = (NodeStateRep *)&signal->theData[0]; + rep->nodeState = newState; + rep->nodeState.masterNodeId = cmasterNodeId; + rep->nodeState.setNodeGroup(c_nodeGroup); + EXECUTE_DIRECT(QMGR, GSN_NODE_STATE_REP, signal, + NodeStateRep::SignalLength); + + if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){ + jam(); + StartOrd * startOrd = (StartOrd *)&signal->theData[0]; + startOrd->restartInfo = c_stopRec.stopReq.requestInfo; + sendSignalWithDelay(CMVMI_REF, GSN_START_ORD, signal, 500, + StartOrd::SignalLength); + } else { + jam(); + sendSignalWithDelay(CMVMI_REF, GSN_STOP_ORD, signal, 500, 1); + } + return; + } + +unblock: + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = reference(); + req->senderData = StopRecord::SR_UNBLOCK_GCP_START_GCP; + req->requestType = WaitGCPReq::UnblockStartGcp; + sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBB); +} + +void +Ndbcntr::execSTOP_CONF(Signal* signal) +{ + jamEntry(); + StopConf *conf = (StopConf*)signal->getDataPtr(); + ndbrequire(c_stopRec.m_state == StopRecord::SR_QMGR_STOP_REQ); + c_stopRec.m_stop_req_counter.clearWaitingFor(conf->nodeId); + if (c_stopRec.m_stop_req_counter.done()) + { + char buf[100]; + NdbNodeBitmask mask; + mask.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + infoEvent("Stopping of %s", mask.getText(buf)); + ndbout_c("Stopping of %s", mask.getText(buf)); + + /** + * Kill any node... + */ + FailRep * const failRep = (FailRep *)&signal->theData[0]; + failRep->failCause = FailRep::ZMULTI_NODE_SHUTDOWN; + NodeReceiverGroup rg(QMGR, c_clusterNodes); + Uint32 nodeId = 0; + while ((nodeId = NdbNodeBitmask::find(c_stopRec.stopReq.nodes, nodeId+1)) + != NdbNodeBitmask::NotFound) + { + failRep->failNodeId = nodeId; + sendSignal(rg, GSN_FAIL_REP, signal, FailRep::SignalLength, JBA); + } + c_stopRec.m_state = StopRecord::SR_WAIT_NODE_FAILURES; + return; } - return; } void Ndbcntr::execSTTORRY(Signal* signal){ diff --git a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp index 5b69a0a921c..d1a3c46cca5 100644 --- a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp +++ b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp @@ -33,6 +33,7 @@ #include <SafeCounter.hpp> #include <RequestTracker.hpp> +#include <signaldata/StopReq.hpp> #include "timer.hpp" @@ -53,6 +54,7 @@ #define ZAPI_HB_HANDLING 3 #define ZTIMER_HANDLING 4 #define ZARBIT_HANDLING 5 +#define ZSTART_FAILURE_LIMIT 6 /* Error Codes ------------------------------*/ #define ZERRTOOMANY 1101 @@ -104,18 +106,42 @@ public: }; struct StartRecord { - void reset(){ m_startKey++; m_startNode = 0;} + void reset(){ + m_startKey++; + m_startNode = 0; + m_gsn = RNIL; + m_nodes.clearWaitingFor(); + } Uint32 m_startKey; Uint32 m_startNode; Uint64 m_startTimeout; Uint32 m_gsn; SignalCounter m_nodes; - } c_start; + Uint32 m_latest_gci; + + Uint32 m_start_type; + NdbNodeBitmask m_skip_nodes; + NdbNodeBitmask m_starting_nodes; + NdbNodeBitmask m_starting_nodes_w_log; + Uint16 m_president_candidate; + Uint32 m_president_candidate_gci; + Uint16 m_regReqReqSent; + Uint16 m_regReqReqRecv; + } c_start; + NdbNodeBitmask c_definedNodes; // DB nodes in config NdbNodeBitmask c_clusterNodes; // DB nodes in cluster NodeBitmask c_connectedNodes; // All kinds of connected nodes + + /** + * Nodes which we're checking for partitioned cluster + * + * i.e. nodes that connect to use, when we already have elected president + */ + NdbNodeBitmask c_readnodes_nodes; + Uint32 c_maxDynamicId; // Records @@ -208,6 +234,7 @@ private: void execPRES_TOCONF(Signal* signal); void execDISCONNECT_REP(Signal* signal); void execSYSTEM_ERROR(Signal* signal); + void execSTOP_REQ(Signal* signal); // Received signals void execDUMP_STATE_ORD(Signal* signal); @@ -222,7 +249,12 @@ private: void execREAD_NODESREQ(Signal* signal); void execSET_VAR_REQ(Signal* signal); + void execREAD_NODESREF(Signal* signal); + void execREAD_NODESCONF(Signal* signal); + void execDIH_RESTARTREF(Signal* signal); + void execDIH_RESTARTCONF(Signal* signal); + void execAPI_VERSION_REQ(Signal* signal); void execAPI_BROADCAST_REP(Signal* signal); @@ -244,6 +276,9 @@ private: void execARBIT_STOPREP(Signal* signal); // Statement blocks + void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn); + Uint32 check_startup(Signal* signal); + void node_failed(Signal* signal, Uint16 aFailedNode); void checkStartInterface(Signal* signal); void failReport(Signal* signal, @@ -261,8 +296,9 @@ private: // Generated statement blocks void startphase1(Signal* signal); - void electionWon(); + void electionWon(Signal* signal); void cmInfoconf010Lab(Signal* signal); + void apiHbHandlingLab(Signal* signal); void timerHandlingLab(Signal* signal); void hbReceivedLab(Signal* signal); @@ -364,12 +400,12 @@ private: /* Status flags ----------------------------------*/ Uint32 c_restartPartialTimeout; + Uint32 c_restartPartionedTimeout; + Uint32 c_restartFailureTimeout; + Uint64 c_start_election_time; Uint16 creadyDistCom; - Uint16 c_regReqReqSent; - Uint16 c_regReqReqRecv; - Uint64 c_stopElectionTime; - Uint16 cpresidentCandidate; + Uint16 cdelayRegreq; Uint16 cpresidentAlive; Uint16 cnoFailedNodes; @@ -406,6 +442,9 @@ private: }; struct OpAllocNodeIdReq opAllocNodeIdReq; + + StopReq c_stopReq; + void check_multi_node_shutdown(Signal* signal); }; #endif diff --git a/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp b/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp index 5d964acc016..6ee24561b0a 100644 --- a/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp +++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp @@ -35,9 +35,8 @@ void Qmgr::initData() Uint32 hbDBAPI = 500; setHbApiDelay(hbDBAPI); - - c_connectedNodes.clear(); c_connectedNodes.set(getOwnNodeId()); + c_stopReq.senderRef = 0; }//Qmgr::initData() void Qmgr::initRecords() @@ -52,6 +51,7 @@ Qmgr::Qmgr(Block_context& ctx) // Transit signals addRecSignal(GSN_DUMP_STATE_ORD, &Qmgr::execDUMP_STATE_ORD); + addRecSignal(GSN_STOP_REQ, &Qmgr::execSTOP_REQ); addRecSignal(GSN_DEBUG_SIG, &Qmgr::execDEBUG_SIG); addRecSignal(GSN_CONTINUEB, &Qmgr::execCONTINUEB); addRecSignal(GSN_CM_HEARTBEAT, &Qmgr::execCM_HEARTBEAT); @@ -101,6 +101,12 @@ Qmgr::Qmgr(Block_context& ctx) addRecSignal(GSN_ARBIT_CHOOSEREF, &Qmgr::execARBIT_CHOOSEREF); addRecSignal(GSN_ARBIT_STOPREP, &Qmgr::execARBIT_STOPREP); + addRecSignal(GSN_READ_NODESREF, &Qmgr::execREAD_NODESREF); + addRecSignal(GSN_READ_NODESCONF, &Qmgr::execREAD_NODESCONF); + + addRecSignal(GSN_DIH_RESTARTREF, &Qmgr::execDIH_RESTARTREF); + addRecSignal(GSN_DIH_RESTARTCONF, &Qmgr::execDIH_RESTARTCONF); + initData(); }//Qmgr::Qmgr() diff --git a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 69447745757..ea15bf971f4 100644 --- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -56,6 +56,33 @@ #define DEBUG_START3(signal, msg) #endif +/** + * c_start.m_gsn = GSN_CM_REGREQ + * Possible for all nodes + * c_start.m_nodes contains all nodes in config + * + * c_start.m_gsn = GSN_CM_NODEINFOREQ; + * Set when receiving CM_REGCONF + * State possible for starting node only (not in cluster) + * + * c_start.m_nodes contains all node in alive cluster that + * that has not replied to GSN_CM_NODEINFOREQ + * passed by president in GSN_CM_REGCONF + * + * c_start.m_gsn = GSN_CM_ADD + * Possible for president only + * Set when receiving and accepting CM_REGREQ (to include node) + * + * c_start.m_nodes contains all nodes in alive cluster + starting node + * that has not replied to GSN_CM_ADD + * by sending GSN_CM_ACKADD + * + * c_start.m_gsn = GSN_CM_NODEINFOCONF + * Possible for non presidents only + * c_start.m_nodes contains a node that has been accepted by president + * but has not connected to us yet + */ + // Signal entries and statement blocks /* 4 P R O G R A M */ /*******************************/ @@ -119,6 +146,30 @@ void Qmgr::execCONTINUEB(Signal* signal) runArbitThread(signal); return; break; + case ZSTART_FAILURE_LIMIT:{ + if (cpresident != ZNIL) + { + jam(); + return; + } + Uint64 now = NdbTick_CurrentMillisecond(); + if (now > (c_start_election_time + c_restartFailureTimeout)) + { + jam(); + BaseString tmp; + tmp.append("Shutting down node as total restart time exceeds " + " StartFailureTimeout as set in config file "); + if(c_restartFailureTimeout == ~0) + tmp.append(" 0 (inifinite)"); + else + tmp.appfmt(" %d", c_restartFailureTimeout); + + progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, tmp.c_str()); + } + signal->theData[0] = ZSTART_FAILURE_LIMIT; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1); + return; + } default: jam(); // ZCOULD_NOT_OCCUR_ERROR; @@ -246,14 +297,28 @@ void Qmgr::startphase1(Signal* signal) nodePtr.p->phase = ZSTARTING; nodePtr.p->blockRef = reference(); c_connectedNodes.set(nodePtr.i); + + signal->theData[0] = reference(); + sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal, 1, JBB); + return; +} - signal->theData[0] = 0; // no answer - signal->theData[1] = 0; // no id - signal->theData[2] = NodeInfo::DB; - sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB); +void +Qmgr::execDIH_RESTARTREF(Signal*signal) +{ + jamEntry(); + c_start.m_latest_gci = 0; + execCM_INFOCONF(signal); +} + +void +Qmgr::execDIH_RESTARTCONF(Signal*signal) +{ + jamEntry(); + + c_start.m_latest_gci = signal->theData[1]; execCM_INFOCONF(signal); - return; } void Qmgr::setHbDelay(UintR aHbDelay) @@ -280,18 +345,24 @@ void Qmgr::execCONNECT_REP(Signal* signal) { jamEntry(); const Uint32 nodeId = signal->theData[0]; + + if (ERROR_INSERTED(931)) + { + jam(); + ndbout_c("Discarding CONNECT_REP(%d)", nodeId); + infoEvent("Discarding CONNECT_REP(%d)", nodeId); + return; + } + c_connectedNodes.set(nodeId); NodeRecPtr nodePtr; nodePtr.i = getOwnNodeId(); ptrCheckGuard(nodePtr, MAX_NODES, nodeRec); switch(nodePtr.p->phase){ - case ZSTARTING: case ZRUNNING: + ndbrequire(!c_clusterNodes.get(nodeId)); + case ZSTARTING: jam(); - if(!c_start.m_nodes.isWaitingFor(nodeId)){ - jam(); - return; - } break; case ZPREPARE_FAIL: case ZFAIL_CLOSING: @@ -303,59 +374,126 @@ void Qmgr::execCONNECT_REP(Signal* signal) case ZAPI_INACTIVE: return; } - + + if (getNodeInfo(nodeId).getType() != NodeInfo::DB) + { + jam(); + return; + } + switch(c_start.m_gsn){ case GSN_CM_REGREQ: jam(); sendCmRegReq(signal, nodeId); + + /** + * We're waiting for CM_REGCONF c_start.m_nodes contains all configured + * nodes + */ + ndbrequire(nodePtr.p->phase == ZSTARTING); + ndbrequire(c_start.m_nodes.isWaitingFor(nodeId)); return; case GSN_CM_NODEINFOREQ: jam(); - sendCmNodeInfoReq(signal, nodeId, nodePtr.p); + + if (c_start.m_nodes.isWaitingFor(nodeId)) + { + jam(); + ndbrequire(getOwnNodeId() != cpresident); + ndbrequire(nodePtr.p->phase == ZSTARTING); + sendCmNodeInfoReq(signal, nodeId, nodePtr.p); + return; + } return; - case GSN_CM_ADD:{ + case GSN_CM_NODEINFOCONF:{ jam(); - - ndbrequire(getOwnNodeId() != cpresident); - c_start.m_nodes.clearWaitingFor(nodeId); - c_start.m_gsn = RNIL; - NodeRecPtr addNodePtr; - addNodePtr.i = nodeId; - ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); - cmAddPrepare(signal, addNodePtr, nodePtr.p); - return; + ndbrequire(getOwnNodeId() != cpresident); + ndbrequire(nodePtr.p->phase == ZRUNNING); + if (c_start.m_nodes.isWaitingFor(nodeId)) + { + jam(); + c_start.m_nodes.clearWaitingFor(nodeId); + c_start.m_gsn = RNIL; + + NodeRecPtr addNodePtr; + addNodePtr.i = nodeId; + ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); + cmAddPrepare(signal, addNodePtr, nodePtr.p); + return; + } } default: - return; + (void)1; } + + ndbrequire(!c_start.m_nodes.isWaitingFor(nodeId)); + ndbrequire(!c_readnodes_nodes.get(nodeId)); + c_readnodes_nodes.set(nodeId); + signal->theData[0] = reference(); + sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA); return; }//Qmgr::execCONNECT_REP() +void +Qmgr::execREAD_NODESCONF(Signal* signal) +{ + check_readnodes_reply(signal, + refToNode(signal->getSendersBlockRef()), + GSN_READ_NODESCONF); +} + +void +Qmgr::execREAD_NODESREF(Signal* signal) +{ + check_readnodes_reply(signal, + refToNode(signal->getSendersBlockRef()), + GSN_READ_NODESREF); +} + /*******************************/ /* CM_INFOCONF */ /*******************************/ void Qmgr::execCM_INFOCONF(Signal* signal) { + /** + * Open communcation to all DB nodes + */ + signal->theData[0] = 0; // no answer + signal->theData[1] = 0; // no id + signal->theData[2] = NodeInfo::DB; + sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB); + cpresident = ZNIL; - cpresidentCandidate = getOwnNodeId(); cpresidentAlive = ZFALSE; - c_stopElectionTime = NdbTick_CurrentMillisecond(); - c_stopElectionTime += c_restartPartialTimeout; + c_start_election_time = NdbTick_CurrentMillisecond(); + + signal->theData[0] = ZSTART_FAILURE_LIMIT; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1); + cmInfoconf010Lab(signal); return; }//Qmgr::execCM_INFOCONF() +Uint32 g_start_type = 0; +NdbNodeBitmask g_nowait_nodes; // Set by clo + void Qmgr::cmInfoconf010Lab(Signal* signal) { c_start.m_startKey = 0; c_start.m_startNode = getOwnNodeId(); c_start.m_nodes.clearWaitingFor(); c_start.m_gsn = GSN_CM_REGREQ; + c_start.m_starting_nodes.clear(); + c_start.m_starting_nodes_w_log.clear(); + c_start.m_regReqReqSent = 0; + c_start.m_regReqReqRecv = 0; + c_start.m_skip_nodes = g_nowait_nodes; + c_start.m_skip_nodes.bitAND(c_definedNodes); + c_start.m_start_type = g_start_type; NodeRecPtr nodePtr; - c_regReqReqSent = c_regReqReqRecv = 0; cnoOfNodes = 0; for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) { jam(); @@ -390,14 +528,18 @@ void Qmgr::cmInfoconf010Lab(Signal* signal) void Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){ - c_regReqReqSent++; - CmRegReq * const cmRegReq = (CmRegReq *)&signal->theData[0]; - cmRegReq->blockRef = reference(); - cmRegReq->nodeId = getOwnNodeId(); - cmRegReq->version = NDB_VERSION; + CmRegReq * req = (CmRegReq *)&signal->theData[0]; + req->blockRef = reference(); + req->nodeId = getOwnNodeId(); + req->version = NDB_VERSION; + req->latest_gci = c_start.m_latest_gci; + req->start_type = c_start.m_start_type; + c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, req->skip_nodes); const Uint32 ref = calcQmgrBlockRef(nodeId); sendSignal(ref, GSN_CM_REGREQ, signal, CmRegReq::SignalLength, JBB); DEBUG_START(GSN_CM_REGREQ, nodeId, ""); + + c_start.m_regReqReqSent++; } /* @@ -437,6 +579,18 @@ Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){ /*******************************/ /* CM_REGREQ */ /*******************************/ +static +int +check_start_type(Uint32 starting, Uint32 own) +{ + if (starting == (1 << NodeState::ST_INITIAL_START) && + ((own & (1 << NodeState::ST_INITIAL_START)) == 0)) + { + return 1; + } + return 0; +} + void Qmgr::execCM_REGREQ(Signal* signal) { DEBUG_START3(signal, ""); @@ -448,6 +602,17 @@ void Qmgr::execCM_REGREQ(Signal* signal) const BlockReference Tblockref = cmRegReq->blockRef; const Uint32 startingVersion = cmRegReq->version; addNodePtr.i = cmRegReq->nodeId; + Uint32 gci = 1; + Uint32 start_type = ~0; + NdbNodeBitmask skip_nodes; + + if (signal->getLength() == CmRegReq::SignalLength) + { + jam(); + gci = cmRegReq->latest_gci; + start_type = cmRegReq->start_type; + skip_nodes.assign(NdbNodeBitmask::Size, cmRegReq->skip_nodes); + } if (creadyDistCom == ZFALSE) { jam(); @@ -461,11 +626,19 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; } - ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); - - if (cpresident != getOwnNodeId()){ + if (check_start_type(start_type, c_start.m_start_type)) + { + jam(); + sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE); + return; + } + + if (cpresident != getOwnNodeId()) + { jam(); - if (cpresident == ZNIL) { + + if (cpresident == ZNIL) + { /*** * We don't know the president. * If the node to be added has lower node id @@ -473,13 +646,19 @@ void Qmgr::execCM_REGREQ(Signal* signal) * candidate */ jam(); - if (addNodePtr.i < cpresidentCandidate) { + if (gci > c_start.m_president_candidate_gci || + (gci == c_start.m_president_candidate_gci && + addNodePtr.i < c_start.m_president_candidate)) + { jam(); - cpresidentCandidate = addNodePtr.i; - }//if + c_start.m_president_candidate = addNodePtr.i; + c_start.m_president_candidate_gci = gci; + ndbout_c("assign candidate: %u %u", addNodePtr.i, gci); + } sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION); return; - } + } + /** * We are not the president. * We know the president. @@ -489,7 +668,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (c_start.m_startNode != 0){ + if (c_start.m_startNode != 0) + { jam(); /** * President busy by adding another node @@ -498,7 +678,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (ctoStatus == Q_ACTIVE) { + if (ctoStatus == Q_ACTIVE) + { jam(); /** * Active taking over as president @@ -507,7 +688,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB) { + if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB) + { jam(); /** * The new node is not in config file @@ -516,13 +698,15 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; } + ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); Phase phase = addNodePtr.p->phase; - if (phase != ZINIT){ + if (phase != ZINIT) + { jam(); DEBUG("phase = " << phase); sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_DEAD); return; - }//if + } jam(); /** @@ -594,7 +778,12 @@ void Qmgr::sendCmRegrefLab(Signal* signal, BlockReference TBRef, ref->blockRef = reference(); ref->nodeId = getOwnNodeId(); ref->errorCode = Terror; - ref->presidentCandidate = (cpresident == ZNIL ? cpresidentCandidate : cpresident); + ref->presidentCandidate = + (cpresident == ZNIL ? c_start.m_president_candidate : cpresident); + ref->candidate_latest_gci = c_start.m_president_candidate_gci; + ref->latest_gci = c_start.m_latest_gci; + ref->start_type = c_start.m_start_type; + c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, ref->skip_nodes); sendSignal(TBRef, GSN_CM_REGREF, signal, CmRegRef::SignalLength, JBB); DEBUG_START(GSN_CM_REGREF, refToNode(TBRef), ""); @@ -622,22 +811,33 @@ void Qmgr::execCM_REGCONF(Signal* signal) jamEntry(); const CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0]; + Uint32 presidentNodeId = cmRegConf->presidentNodeId; if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) { jam(); char buf[128]; - BaseString::snprintf(buf,sizeof(buf),"incompatible version own=0x%x other=0x%x, shutting down", NDB_VERSION, cmRegConf->presidentVersion); + BaseString::snprintf(buf,sizeof(buf), + "incompatible version own=0x%x other=0x%x, " + " shutting down", + NDB_VERSION, cmRegConf->presidentVersion); systemErrorLab(signal, __LINE__, buf); return; } - + myNodePtr.i = getOwnNodeId(); + ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec); + + ndbrequire(c_start.m_gsn == GSN_CM_REGREQ); + ndbrequire(myNodePtr.p->phase = ZSTARTING); + cpdistref = cmRegConf->presidentBlockRef; cpresident = cmRegConf->presidentNodeId; UintR TdynamicId = cmRegConf->dynamicId; c_maxDynamicId = TdynamicId; c_clusterNodes.assign(NdbNodeBitmask::Size, cmRegConf->allNdbNodes); + myNodePtr.p->ndynamicId = TdynamicId; + /*--------------------------------------------------------------*/ // Send this as an EVENT REPORT to inform about hearing about // other NDB node proclaiming to be president. @@ -648,10 +848,6 @@ void Qmgr::execCM_REGCONF(Signal* signal) signal->theData[3] = TdynamicId; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); - myNodePtr.i = getOwnNodeId(); - ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec); - myNodePtr.p->ndynamicId = TdynamicId; - for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) { jam(); if (c_clusterNodes.get(nodePtr.i)){ @@ -675,6 +871,84 @@ void Qmgr::execCM_REGCONF(Signal* signal) }//Qmgr::execCM_REGCONF() void +Qmgr::check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn) +{ + NodeRecPtr myNodePtr; + myNodePtr.i = getOwnNodeId(); + ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec); + + NodeRecPtr nodePtr; + nodePtr.i = nodeId; + ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec); + + ndbrequire(c_readnodes_nodes.get(nodeId)); + ReadNodesConf* conf = (ReadNodesConf*)signal->getDataPtr(); + if (gsn == GSN_READ_NODESREF) + { + jam(); +retry: + signal->theData[0] = reference(); + sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA); + return; + } + + if (conf->masterNodeId == ZNIL) + { + jam(); + goto retry; + } + + Uint32 president = conf->masterNodeId; + if (president == cpresident) + { + jam(); + c_readnodes_nodes.clear(nodeId); + return; + } + + char buf[255]; + BaseString::snprintf(buf, sizeof(buf), + "Partitioned cluster! check StartPartialTimeout, " + " node %d thinks %d is president, " + " I think president is: %d", + nodeId, president, cpresident); + + ndbout_c(buf); + CRASH_INSERTION(933); + + if (getNodeState().startLevel == NodeState::SL_STARTED) + { + jam(); + NdbNodeBitmask part; + part.assign(NdbNodeBitmask::Size, conf->clusterNodes); + FailRep* rep = (FailRep*)signal->getDataPtrSend(); + rep->failCause = FailRep::ZPARTITIONED_CLUSTER; + rep->president = cpresident; + c_clusterNodes.copyto(NdbNodeBitmask::Size, rep->partition); + Uint32 ref = calcQmgrBlockRef(nodeId); + Uint32 i = 0; + while((i = part.find(i + 1)) != NdbNodeBitmask::NotFound) + { + if (i == nodeId) + continue; + rep->failNodeId = i; + sendSignal(ref, GSN_FAIL_REP, signal, FailRep::SignalLength, JBA); + } + rep->failNodeId = nodeId; + sendSignal(ref, GSN_FAIL_REP, signal, FailRep::SignalLength, JBB); + return; + } + + CRASH_INSERTION(932); + + progError(__LINE__, + NDBD_EXIT_ARBIT_SHUTDOWN, + buf); + + ndbrequire(false); +} + +void Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){ CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtrSend(); req->nodeId = getOwnNodeId(); @@ -703,26 +977,105 @@ Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){ /*******************************/ /* CM_REGREF */ /*******************************/ +static +const char * +get_start_type_string(Uint32 st) +{ + static char buf[256]; + + if (st == 0) + { + return "<ANY>"; + } + else + { + buf[0] = 0; + for(Uint32 i = 0; i<NodeState::ST_ILLEGAL_TYPE; i++) + { + if (st & (1 << i)) + { + if (buf[0]) + strcat(buf, "/"); + switch(i){ + case NodeState::ST_INITIAL_START: + strcat(buf, "inital start"); + break; + case NodeState::ST_SYSTEM_RESTART: + strcat(buf, "system restart"); + break; + case NodeState::ST_NODE_RESTART: + strcat(buf, "node restart"); + break; + case NodeState::ST_INITIAL_NODE_RESTART: + strcat(buf, "initial node restart"); + break; + } + } + } + return buf; + } +} + void Qmgr::execCM_REGREF(Signal* signal) { jamEntry(); - c_regReqReqRecv++; - // Ignore block reference in data[0] - UintR TaddNodeno = signal->theData[1]; - UintR TrefuseReason = signal->theData[2]; - Uint32 candidate = signal->theData[3]; + CmRegRef* ref = (CmRegRef*)signal->getDataPtr(); + UintR TaddNodeno = ref->nodeId; + UintR TrefuseReason = ref->errorCode; + Uint32 candidate = ref->presidentCandidate; + Uint32 node_gci = 1; + Uint32 candidate_gci = 1; + Uint32 start_type = ~0; + NdbNodeBitmask skip_nodes; DEBUG_START3(signal, TrefuseReason); - - if(candidate != cpresidentCandidate){ + + if (signal->getLength() == CmRegRef::SignalLength) + { jam(); - c_regReqReqRecv = ~0; + node_gci = ref->latest_gci; + candidate_gci = ref->candidate_latest_gci; + start_type = ref->start_type; + skip_nodes.assign(NdbNodeBitmask::Size, ref->skip_nodes); } + + c_start.m_regReqReqRecv++; + // Ignore block reference in data[0] + + if(candidate != c_start.m_president_candidate) + { + jam(); + c_start.m_regReqReqRecv = ~0; + } + + c_start.m_starting_nodes.set(TaddNodeno); + if (node_gci) + { + jam(); + c_start.m_starting_nodes_w_log.set(TaddNodeno); + } + + skip_nodes.bitAND(c_definedNodes); + c_start.m_skip_nodes.bitOR(skip_nodes); + + char buf[100]; switch (TrefuseReason) { case CmRegRef::ZINCOMPATIBLE_VERSION: jam(); - systemErrorLab(signal, __LINE__, "incompatible version, connection refused by running ndb node"); + systemErrorLab(signal, __LINE__, + "incompatible version, " + "connection refused by running ndb node"); + case CmRegRef::ZINCOMPATIBLE_START_TYPE: + jam(); + BaseString::snprintf(buf, sizeof(buf), + "incompatible start type detected: node %d" + " reports %s(%d) my start type: %s(%d)", + TaddNodeno, + get_start_type_string(start_type), start_type, + get_start_type_string(c_start.m_start_type), + c_start.m_start_type); + progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf); break; case CmRegRef::ZBUSY: case CmRegRef::ZBUSY_TO_PRES: @@ -741,14 +1094,19 @@ void Qmgr::execCM_REGREF(Signal* signal) break; case CmRegRef::ZELECTION: jam(); - if (cpresidentCandidate > TaddNodeno) { + if (candidate_gci > c_start.m_president_candidate_gci || + (candidate_gci == c_start.m_president_candidate_gci && + candidate < c_start.m_president_candidate)) + { jam(); //---------------------------------------- /* We may already have a candidate */ /* choose the lowest nodeno */ //---------------------------------------- signal->theData[3] = 2; - cpresidentCandidate = TaddNodeno; + c_start.m_president_candidate = candidate; + c_start.m_president_candidate_gci = candidate_gci; + ndbout_c("assign candidate: %u %u", candidate, candidate_gci); } else { signal->theData[3] = 4; }//if @@ -776,32 +1134,34 @@ void Qmgr::execCM_REGREF(Signal* signal) //----------------------------------------- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); - if(cpresidentAlive == ZTRUE){ + if(cpresidentAlive == ZTRUE) + { jam(); - DEBUG(""); + DEBUG("cpresidentAlive"); return; } - if(c_regReqReqSent != c_regReqReqRecv){ + if(c_start.m_regReqReqSent != c_start.m_regReqReqRecv) + { jam(); - DEBUG( c_regReqReqSent << " != " << c_regReqReqRecv); + DEBUG(c_start.m_regReqReqSent << " != " << c_start.m_regReqReqRecv); return; } - if(cpresidentCandidate != getOwnNodeId()){ + if(c_start.m_president_candidate != getOwnNodeId()) + { jam(); - DEBUG(""); + DEBUG("i'm not the candidate"); return; } - + /** - * All configured nodes has agreed + * All connected nodes has agreed */ - Uint64 now = NdbTick_CurrentMillisecond(); - if((c_regReqReqRecv == cnoOfNodes) || now > c_stopElectionTime){ + if(check_startup(signal)) + { jam(); - electionWon(); - sendSttorryLab(signal); + electionWon(signal); /** * Start timer handling @@ -813,8 +1173,192 @@ void Qmgr::execCM_REGREF(Signal* signal) return; }//Qmgr::execCM_REGREF() +Uint32 +Qmgr::check_startup(Signal* signal) +{ + Uint64 now = NdbTick_CurrentMillisecond(); + Uint64 partial_timeout = c_start_election_time + c_restartPartialTimeout; + Uint64 partitioned_timeout = partial_timeout + c_restartPartionedTimeout; + + /** + * First see if we should wait more... + */ + NdbNodeBitmask tmp; + tmp.bitOR(c_start.m_skip_nodes); + tmp.bitOR(c_start.m_starting_nodes); + + NdbNodeBitmask wait; + wait.assign(c_definedNodes); + wait.bitANDC(tmp); + + Uint32 retVal = 0; + NdbNodeBitmask report_mask; + + if ((c_start.m_latest_gci == 0) || + (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START))) + { + if (!tmp.equal(c_definedNodes)) + { + jam(); + signal->theData[1] = 1; + signal->theData[2] = ~0; + report_mask.assign(wait); + retVal = 0; + goto start_report; + } + else + { + jam(); + signal->theData[1] = 0x8000; + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + } + const bool all = c_start.m_starting_nodes.equal(c_definedNodes); + CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0]; + + { + /** + * Check for missing node group directly + */ + char buf[100]; + NdbNodeBitmask check; + check.assign(c_definedNodes); + check.bitANDC(c_start.m_starting_nodes); // Not connected nodes + check.bitOR(c_start.m_starting_nodes_w_log); + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = check; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + if (sd->output == CheckNodeGroups::Lose) + { + jam(); + goto missing_nodegroup; + } + } + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = c_start.m_starting_nodes; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + const Uint32 result = sd->output; + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = c_start.m_starting_nodes_w_log; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + const Uint32 result_w_log = sd->output; + + if (tmp.equal(c_definedNodes)) + { + /** + * All nodes (wrt no-wait nodes) has connected... + * this means that we will now start or die + */ + jam(); + switch(result_w_log){ + case CheckNodeGroups::Lose: + { + jam(); + goto missing_nodegroup; + } + case CheckNodeGroups::Win: + signal->theData[1] = all ? 0x8001 : 0x8002; + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + case CheckNodeGroups::Partitioning: + ndbrequire(result != CheckNodeGroups::Lose); + signal->theData[1] = + all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + } + + if (now < partial_timeout) + { + jam(); + signal->theData[1] = c_restartPartialTimeout == ~0 ? 2 : 3; + signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000); + report_mask.assign(wait); + retVal = 0; + goto start_report; + } + + /** + * Start partial has passed...check for partitioning... + */ + switch(result_w_log){ + case CheckNodeGroups::Lose: + jam(); + goto missing_nodegroup; + case CheckNodeGroups::Partitioning: + if (now < partitioned_timeout && result != CheckNodeGroups::Win) + { + signal->theData[1] = c_restartPartionedTimeout == ~0 ? 4 : 5; + signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 0; + goto start_report; + } + // Fall through... + case CheckNodeGroups::Win: + signal->theData[1] = + all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + + ndbrequire(false); + +start_report: + jam(); + { + Uint32 sz = NdbNodeBitmask::Size; + signal->theData[0] = NDB_LE_StartReport; + signal->theData[3] = sz; + Uint32* ptr = signal->theData+4; + c_definedNodes.copyto(sz, ptr); ptr += sz; + c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz; + c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz; + report_mask.copyto(sz, ptr); ptr+= sz; + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, + 4+4*NdbNodeBitmask::Size, JBB); + } + return retVal; + +missing_nodegroup: + jam(); + char buf[100], mask1[100], mask2[100]; + c_start.m_starting_nodes.getText(mask1); + tmp.assign(c_start.m_starting_nodes); + tmp.bitANDC(c_start.m_starting_nodes_w_log); + tmp.getText(mask2); + BaseString::snprintf(buf, sizeof(buf), + "Unable to start missing node group! " + " starting: %s (missing fs for: %s)", + mask1, mask2); + progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf); +} + void -Qmgr::electionWon(){ +Qmgr::electionWon(Signal* signal){ NodeRecPtr myNodePtr; cpresident = getOwnNodeId(); /* This node becomes president. */ myNodePtr.i = getOwnNodeId(); @@ -831,8 +1375,21 @@ Qmgr::electionWon(){ c_clusterNodes.set(getOwnNodeId()); cpresidentAlive = ZTRUE; - c_stopElectionTime = ~0; + c_start_election_time = ~0; c_start.reset(); + + signal->theData[0] = NDB_LE_CM_REGCONF; + signal->theData[1] = getOwnNodeId(); + signal->theData[2] = cpresident; + signal->theData[3] = 1; + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); + + c_start.m_starting_nodes.clear(getOwnNodeId()); + if (c_start.m_starting_nodes.isclear()) + { + jam(); + sendSttorryLab(signal); + } } /* @@ -846,7 +1403,15 @@ Qmgr::electionWon(){ /*--------------------------------------------------------------*/ void Qmgr::regreqTimeLimitLab(Signal* signal) { - if(cpresident == ZNIL){ + if(cpresident == ZNIL) + { + if (c_start.m_president_candidate == ZNIL) + { + jam(); + c_start.m_president_candidate = getOwnNodeId(); + ndbout_c("Assigning candidate to self: %d", getOwnNodeId()); + } + cmInfoconf010Lab(signal); } }//Qmgr::regreqTimelimitLab() @@ -967,7 +1532,7 @@ Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){ ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD); c_start.m_nodes.clearWaitingFor(); c_start.m_nodes.setWaitingFor(nodePtr.i); - c_start.m_gsn = GSN_CM_ADD; + c_start.m_gsn = GSN_CM_NODEINFOCONF; #else warningEvent("Enabling communication to CM_ADD node %u state=%d", nodePtr.i, @@ -1256,6 +1821,17 @@ void Qmgr::execCM_ACKADD(Signal* signal) */ handleArbitNdbAdd(signal, addNodePtr.i); c_start.reset(); + + if (c_start.m_starting_nodes.get(addNodePtr.i)) + { + jam(); + c_start.m_starting_nodes.clear(addNodePtr.i); + if (c_start.m_starting_nodes.isclear()) + { + jam(); + sendSttorryLab(signal); + } + } return; }//switch ndbrequire(false); @@ -1409,7 +1985,8 @@ void Qmgr::initData(Signal* signal) cnoPrepFailedNodes = 0; creadyDistCom = ZFALSE; cpresident = ZNIL; - cpresidentCandidate = ZNIL; + c_start.m_president_candidate = ZNIL; + c_start.m_president_candidate_gci = 0; cpdistref = 0; cneighbourh = ZNIL; cneighbourl = ZNIL; @@ -1437,15 +2014,33 @@ void Qmgr::initData(Signal* signal) Uint32 hbDBAPI = 1500; Uint32 arbitTimeout = 1000; c_restartPartialTimeout = 30000; + c_restartPartionedTimeout = 60000; + c_restartFailureTimeout = ~0; ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB); ndb_mgm_get_int_parameter(p, CFG_DB_API_HEARTBEAT_INTERVAL, &hbDBAPI); ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout); ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT, &c_restartPartialTimeout); - if(c_restartPartialTimeout == 0){ + ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT, + &c_restartPartionedTimeout); + ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT, + &c_restartFailureTimeout); + + if(c_restartPartialTimeout == 0) + { c_restartPartialTimeout = ~0; } + if (c_restartPartionedTimeout ==0) + { + c_restartPartionedTimeout = ~0; + } + + if (c_restartFailureTimeout == 0) + { + c_restartFailureTimeout = ~0; + } + setHbDelay(hbDBDB); setHbApiDelay(hbDBAPI); setArbitTimeout(arbitTimeout); @@ -1872,10 +2467,23 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) const Uint32 nodeId = rep->nodeId; const Uint32 err = rep->err; c_connectedNodes.clear(nodeId); - + c_readnodes_nodes.clear(nodeId); + NodeRecPtr nodePtr; nodePtr.i = getOwnNodeId(); ptrCheckGuard(nodePtr, MAX_NODES, nodeRec); + + char buf[100]; + if (getNodeInfo(nodeId).getType() == NodeInfo::DB && + getNodeState().startLevel < NodeState::SL_STARTED) + { + jam(); + CRASH_INSERTION(932); + BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); + progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); + ndbrequire(false); + } + switch(nodePtr.p->phase){ case ZRUNNING: jam(); @@ -1893,9 +2501,12 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) case ZAPI_ACTIVE: ndbrequire(false); case ZAPI_INACTIVE: + { + BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); + progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); ndbrequire(false); } - + } node_failed(signal, nodeId); }//DISCONNECT_REP @@ -2153,10 +2764,16 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, failedNodePtr.i = aFailedNode; ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec); + FailRep* rep = (FailRep*)signal->getDataPtr(); + + check_multi_node_shutdown(signal); + if (failedNodePtr.i == getOwnNodeId()) { jam(); + Uint32 code = 0; const char * msg = 0; + char extra[100]; switch(aFailCause){ case FailRep::ZOWN_FAILURE: msg = "Own failure"; @@ -2177,17 +2794,51 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, case FailRep::ZLINK_FAILURE: msg = "Connection failure"; break; + case FailRep::ZPARTITIONED_CLUSTER: + { + code = NDBD_EXIT_ARBIT_SHUTDOWN; + char buf1[100], buf2[100]; + c_clusterNodes.getText(buf1); + if (signal->getLength()== FailRep::SignalLength + FailRep::ExtraLength && + signal->header.theVerId_signalNumber == GSN_FAIL_REP) + { + jam(); + NdbNodeBitmask part; + part.assign(NdbNodeBitmask::Size, rep->partition); + part.getText(buf2); + BaseString::snprintf(extra, sizeof(extra), + "Partitioned cluster!" + " Our cluster: %s other cluster: %s", + buf1, buf2); + } + else + { + jam(); + BaseString::snprintf(extra, sizeof(extra), + "Partitioned cluster!" + " Our cluster: %s ", buf1); + } + msg = extra; + break; + } + case FailRep::ZMULTI_NODE_SHUTDOWN: + msg = "Multi node shutdown"; + break; + default: + msg = "<UNKNOWN>"; } - char buf[100]; - BaseString::snprintf(buf, 100, + CRASH_INSERTION(932); + + char buf[255]; + BaseString::snprintf(buf, sizeof(buf), "We(%u) have been declared dead by %u reason: %s(%u)", getOwnNodeId(), refToNode(signal->getSendersBlockRef()), aFailCause, msg ? msg : "<Unknown>"); - - progError(__LINE__, 0, buf); + + progError(__LINE__, code, buf); return; }//if @@ -2244,7 +2895,9 @@ void Qmgr::execPREP_FAILREQ(Signal* signal) { NodeRecPtr myNodePtr; jamEntry(); - + + check_multi_node_shutdown(signal); + PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0]; BlockReference Tblockref = prepFail->xxxBlockRef; @@ -3896,6 +4549,7 @@ Qmgr::stateArbitCrash(Signal* signal) if (! (arbitRec.getTimediff() > getArbitTimeout())) return; #endif + CRASH_INSERTION(932); progError(__LINE__, NDBD_EXIT_ARBIT_SHUTDOWN, "Arbitrator decided to shutdown this node"); } @@ -3959,8 +4613,10 @@ Qmgr::execDUMP_STATE_ORD(Signal* signal) case 1: infoEvent("creadyDistCom = %d, cpresident = %d\n", creadyDistCom, cpresident); - infoEvent("cpresidentAlive = %d, cpresidentCand = %d\n", - cpresidentAlive, cpresidentCandidate); + infoEvent("cpresidentAlive = %d, cpresidentCand = %d (gci: %d)\n", + cpresidentAlive, + c_start.m_president_candidate, + c_start.m_president_candidate_gci); infoEvent("ctoStatus = %d\n", ctoStatus); for(Uint32 i = 1; i<MAX_NDB_NODES; i++){ if(getNodeInfo(i).getType() == NodeInfo::DB){ @@ -4221,3 +4877,41 @@ Qmgr::completeAllocNodeIdReq(Signal *signal) sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_CONF, signal, AllocNodeIdConf::SignalLength, JBB); } + +void +Qmgr::execSTOP_REQ(Signal* signal) +{ + jamEntry(); + c_stopReq = * (StopReq*)signal->getDataPtr(); + + if (c_stopReq.senderRef) + { + jam(); + ndbrequire(NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId())); + + StopConf *conf = (StopConf*)signal->getDataPtrSend(); + conf->senderData = c_stopReq.senderData; + conf->nodeState = getOwnNodeId(); + sendSignal(c_stopReq.senderRef, + GSN_STOP_CONF, signal, StopConf::SignalLength, JBA); + } +} + +void +Qmgr::check_multi_node_shutdown(Signal* signal) +{ + if (c_stopReq.senderRef && + NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId())) + { + jam(); + if(StopReq::getPerformRestart(c_stopReq.requestInfo)) + { + jam(); + StartOrd * startOrd = (StartOrd *)&signal->theData[0]; + startOrd->restartInfo = c_stopReq.requestInfo; + EXECUTE_DIRECT(CMVMI, GSN_START_ORD, signal, 2); + } else { + EXECUTE_DIRECT(CMVMI, GSN_STOP_ORD, signal, 1); + } + } +} diff --git a/storage/ndb/src/kernel/vm/Configuration.cpp b/storage/ndb/src/kernel/vm/Configuration.cpp index 5f3b601023f..227ad6d6893 100644 --- a/storage/ndb/src/kernel/vm/Configuration.cpp +++ b/storage/ndb/src/kernel/vm/Configuration.cpp @@ -55,6 +55,12 @@ enum ndbd_options { NDB_STD_OPTS_VARS; // XXX should be my_bool ??? static int _daemon, _no_daemon, _foreground, _initial, _no_start; +static int _initialstart; +static const char* _nowait_nodes; + +extern Uint32 g_start_type; +extern NdbNodeBitmask g_nowait_nodes; + /** * Arguments to NDB process */ @@ -82,6 +88,14 @@ static struct my_option my_long_options[] = " (implies --nodaemon)", (gptr*) &_foreground, (gptr*) &_foreground, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, + { "nowait-nodes", NO_ARG, + "Nodes that will not be waited for during start", + (gptr*) &_nowait_nodes, (gptr*) &_nowait_nodes, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 }, + { "initial-start", NO_ARG, + "Perform initial start", + (gptr*) &_initialstart, (gptr*) &_initialstart, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; static void short_usage_sub(void) @@ -150,6 +164,37 @@ Configuration::init(int argc, char** argv) globalData.ownId= 0; + if (_nowait_nodes) + { + BaseString str(_nowait_nodes); + Vector<BaseString> arr; + str.split(arr, ","); + for (Uint32 i = 0; i<arr.size(); i++) + { + char *endptr = 0; + long val = strtol(arr[i].c_str(), &endptr, 10); + if (*endptr) + { + ndbout_c("Unable to parse nowait-nodes argument: %s : %s", + arr[i].c_str(), _nowait_nodes); + exit(-1); + } + if (! (val > 0 && val < MAX_NDB_NODES)) + { + ndbout_c("Invalid nodeid specified in nowait-nodes: %d : %s", + val, _nowait_nodes); + exit(-1); + } + g_nowait_nodes.set(val); + } + } + + if (_initialstart) + { + _initialStart = true; + g_start_type |= (1 << NodeState::ST_INITIAL_START); + } + return true; } diff --git a/storage/ndb/test/ndbapi/testNodeRestart.cpp b/storage/ndb/test/ndbapi/testNodeRestart.cpp index ba195290e9e..aed0b39f196 100644 --- a/storage/ndb/test/ndbapi/testNodeRestart.cpp +++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp @@ -22,7 +22,7 @@ #include <NdbRestarts.hpp> #include <Vector.hpp> #include <signaldata/DumpStateOrd.hpp> - +#include <Bitmask.hpp> int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){ @@ -669,6 +669,206 @@ err: return NDBT_FAILED; } +int +runBug18612(NDBT_Context* ctx, NDBT_Step* step){ + + // Assume two replicas + NdbRestarter restarter; + if (restarter.getNumDbNodes() < 2) + { + ctx->stopTest(); + return NDBT_OK; + } + + Uint32 cnt = restarter.getNumDbNodes(); + + for(int loop = 0; loop < ctx->getNumLoops(); loop++) + { + int partition0[256]; + int partition1[256]; + bzero(partition0, sizeof(partition0)); + bzero(partition1, sizeof(partition1)); + Bitmask<4> nodesmask; + + Uint32 node1 = restarter.getDbNodeId(rand()%cnt); + for (Uint32 i = 0; i<cnt/2; i++) + { + do { + int tmp = restarter.getRandomNodeOtherNodeGroup(node1, rand()); + if (tmp == -1) + break; + node1 = tmp; + } while(nodesmask.get(node1)); + + partition0[i] = node1; + partition1[i] = restarter.getRandomNodeSameNodeGroup(node1, rand()); + + ndbout_c("nodes %d %d", node1, partition1[i]); + + assert(!nodesmask.get(node1)); + assert(!nodesmask.get(partition1[i])); + nodesmask.set(node1); + nodesmask.set(partition1[i]); + } + + ndbout_c("done"); + + int dump[255]; + dump[0] = DumpStateOrd::NdbcntrStopNodes; + memcpy(dump + 1, partition0, sizeof(int)*cnt/2); + + Uint32 master = restarter.getMasterNodeId(); + + if (restarter.dumpStateOneNode(master, dump, 1+cnt/2)) + return NDBT_FAILED; + + if (restarter.waitNodesNoStart(partition0, cnt/2)) + return NDBT_FAILED; + + int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + + if (restarter.dumpStateAllNodes(val2, 2)) + return NDBT_FAILED; + + if (restarter.insertErrorInAllNodes(932)) + return NDBT_FAILED; + + dump[0] = 9000; + memcpy(dump + 1, partition0, sizeof(int)*cnt/2); + for (Uint32 i = 0; i<cnt/2; i++) + if (restarter.dumpStateOneNode(partition1[i], dump, 1+cnt/2)) + return NDBT_FAILED; + + dump[0] = 9000; + memcpy(dump + 1, partition1, sizeof(int)*cnt/2); + for (Uint32 i = 0; i<cnt/2; i++) + if (restarter.dumpStateOneNode(partition0[i], dump, 1+cnt/2)) + return NDBT_FAILED; + + if (restarter.startNodes(partition0, cnt/2)) + return NDBT_FAILED; + + if (restarter.waitNodesStartPhase(partition0, cnt/2, 2)) + return NDBT_FAILED; + + dump[0] = 9001; + for (Uint32 i = 0; i<cnt/2; i++) + if (restarter.dumpStateAllNodes(dump, 2)) + return NDBT_FAILED; + + if (restarter.waitNodesNoStart(partition0, cnt/2)) + return NDBT_FAILED; + + for (Uint32 i = 0; i<cnt/2; i++) + if (restarter.restartOneDbNode(partition0[i], true, true, true)) + return NDBT_FAILED; + + if (restarter.waitNodesNoStart(partition0, cnt/2)) + return NDBT_FAILED; + + if (restarter.startAll()) + return NDBT_FAILED; + + if (restarter.waitClusterStarted()) + return NDBT_FAILED; + } + return NDBT_OK; +} + +int +runBug18612SR(NDBT_Context* ctx, NDBT_Step* step){ + + // Assume two replicas + NdbRestarter restarter; + if (restarter.getNumDbNodes() < 2) + { + ctx->stopTest(); + return NDBT_OK; + } + + Uint32 cnt = restarter.getNumDbNodes(); + + for(int loop = 0; loop < ctx->getNumLoops(); loop++) + { + int partition0[256]; + int partition1[256]; + bzero(partition0, sizeof(partition0)); + bzero(partition1, sizeof(partition1)); + Bitmask<4> nodesmask; + + Uint32 node1 = restarter.getDbNodeId(rand()%cnt); + for (Uint32 i = 0; i<cnt/2; i++) + { + do { + int tmp = restarter.getRandomNodeOtherNodeGroup(node1, rand()); + if (tmp == -1) + break; + node1 = tmp; + } while(nodesmask.get(node1)); + + partition0[i] = node1; + partition1[i] = restarter.getRandomNodeSameNodeGroup(node1, rand()); + + ndbout_c("nodes %d %d", node1, partition1[i]); + + assert(!nodesmask.get(node1)); + assert(!nodesmask.get(partition1[i])); + nodesmask.set(node1); + nodesmask.set(partition1[i]); + } + + ndbout_c("done"); + + if (restarter.restartAll(false, true, false)) + return NDBT_FAILED; + + int dump[255]; + dump[0] = 9000; + memcpy(dump + 1, partition0, sizeof(int)*cnt/2); + for (Uint32 i = 0; i<cnt/2; i++) + if (restarter.dumpStateOneNode(partition1[i], dump, 1+cnt/2)) + return NDBT_FAILED; + + dump[0] = 9000; + memcpy(dump + 1, partition1, sizeof(int)*cnt/2); + for (Uint32 i = 0; i<cnt/2; i++) + if (restarter.dumpStateOneNode(partition0[i], dump, 1+cnt/2)) + return NDBT_FAILED; + + int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + + if (restarter.dumpStateAllNodes(val2, 2)) + return NDBT_FAILED; + + if (restarter.insertErrorInAllNodes(932)) + return NDBT_FAILED; + + if (restarter.startAll()) + return NDBT_FAILED; + + if (restarter.waitClusterStartPhase(2)) + return NDBT_FAILED; + + dump[0] = 9001; + for (Uint32 i = 0; i<cnt/2; i++) + if (restarter.dumpStateAllNodes(dump, 2)) + return NDBT_FAILED; + + if (restarter.waitClusterNoStart(30)) + if (restarter.waitNodesNoStart(partition0, cnt/2, 10)) + if (restarter.waitNodesNoStart(partition1, cnt/2, 10)) + return NDBT_FAILED; + + if (restarter.startAll()) + return NDBT_FAILED; + + if (restarter.waitClusterStarted()) + return NDBT_FAILED; + } + return NDBT_OK; +} + + NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", "Test that one node at a time can be stopped and then restarted "\ @@ -963,6 +1163,18 @@ TESTCASE("Bug18414", STEP(runBug18414); FINALIZER(runClearTable); } +TESTCASE("Bug18612", + "Test bug with partitioned clusters"){ + INITIALIZER(runLoadTable); + STEP(runBug18612); + FINALIZER(runClearTable); +} +TESTCASE("Bug18612SR", + "Test bug with partitioned clusters"){ + INITIALIZER(runLoadTable); + STEP(runBug18612SR); + FINALIZER(runClearTable); +} NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ diff --git a/storage/ndb/test/run-test/daily-basic-tests.txt b/storage/ndb/test/run-test/daily-basic-tests.txt index 5d5e70f7184..08467a652f0 100644 --- a/storage/ndb/test/run-test/daily-basic-tests.txt +++ b/storage/ndb/test/run-test/daily-basic-tests.txt @@ -453,10 +453,18 @@ args: -n Bug16772 T1 #cmd: testSystemRestart #args: -n Bug18385 T1 # -max-time: 500 +max-time: 1000 cmd: testNodeRestart args: -n Bug18414 T1 +max-time: 1000 +cmd: testNodeRestart +args: -n Bug18612 T1 + +max-time: 1000 +cmd: testNodeRestart +args: -n Bug18612SR T1 + # # DICT TESTS max-time: 1500 diff --git a/storage/ndb/test/src/NdbRestarts.cpp b/storage/ndb/test/src/NdbRestarts.cpp index eea4af437c4..8465caaab48 100644 --- a/storage/ndb/test/src/NdbRestarts.cpp +++ b/storage/ndb/test/src/NdbRestarts.cpp @@ -445,8 +445,7 @@ int twoNodeFailure(NdbRestarter& _restarter, << ") secs " << endl; NdbSleep_SecSleep(seconds); - randomId = (rand() % _restarter.getNumDbNodes()); - nodeId = _restarter.getDbNodeId(randomId); + nodeId = _restarter.getRandomNodeOtherNodeGroup(nodeId, rand()); g_info << _restart->m_name << ": node = "<< nodeId << endl; CHECK(_restarter.insertErrorInNode(nodeId, 9999) == 0, |