diff options
author | unknown <pekka@clam.ndb.mysql.com> | 2006-06-15 14:49:48 +0200 |
---|---|---|
committer | unknown <pekka@clam.ndb.mysql.com> | 2006-06-15 14:49:48 +0200 |
commit | 2dc4e1709852cace46d33e59329de245d6be937f (patch) | |
tree | 299312716c237e7666afa5ff66656c20466a0f87 /storage | |
parent | 1b3ffa03f223ec12dba9db63c73be29c1d2b2765 (diff) | |
parent | a182963cc04b7ff0ca27abe33a075ac06e4e0c2e (diff) | |
download | mariadb-git-2dc4e1709852cace46d33e59329de245d6be937f.tar.gz |
Merge clam.ndb.mysql.com:/space/pekka/ndb/version/my50-bug18781
into clam.ndb.mysql.com:/space/pekka/ndb/version/my51-bug18781
storage/ndb/include/ndb_version.h.in:
manual merge
storage/ndb/include/kernel/GlobalSignalNumbers.h:
manual merge
storage/ndb/include/kernel/signaldata/AlterTable.hpp:
SCCS merged
storage/ndb/include/kernel/signaldata/CreateTable.hpp:
SCCS merged
storage/ndb/include/kernel/signaldata/DropTable.hpp:
SCCS merged
storage/ndb/src/common/debugger/SignalLoggerManager.cpp:
SCCS merged
storage/ndb/src/common/debugger/signaldata/SignalNames.cpp:
manual merge
storage/ndb/src/kernel/main.cpp:
SCCS merged
storage/ndb/src/kernel/blocks/ERROR_codes.txt:
SCCS merged
storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp:
manual merge
storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp:
manual merge
storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp:
manual merge
storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp:
SCCS merged
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
manual merge
storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp:
SCCS merged
storage/ndb/src/kernel/vm/DLFifoList.hpp:
use local
storage/ndb/src/kernel/vm/pc.hpp:
SCCS merged
storage/ndb/src/ndbapi/ndberror.c:
use local - add 711 later
storage/ndb/test/ndbapi/testDict.cpp:
SCCS merged
Diffstat (limited to 'storage')
-rw-r--r-- | storage/ndb/include/kernel/GlobalSignalNumbers.h | 10 | ||||
-rw-r--r-- | storage/ndb/include/kernel/signaldata/AlterTable.hpp | 1 | ||||
-rw-r--r-- | storage/ndb/include/kernel/signaldata/CreateTable.hpp | 1 | ||||
-rw-r--r-- | storage/ndb/include/kernel/signaldata/DropTable.hpp | 1 | ||||
-rw-r--r-- | storage/ndb/include/ndb_version.h.in | 3 | ||||
-rw-r--r-- | storage/ndb/src/common/debugger/SignalLoggerManager.cpp | 2 | ||||
-rw-r--r-- | storage/ndb/src/common/debugger/signaldata/SignalNames.cpp | 7 | ||||
-rw-r--r-- | storage/ndb/src/kernel/blocks/ERROR_codes.txt | 6 | ||||
-rw-r--r-- | storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp | 314 | ||||
-rw-r--r-- | storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp | 97 | ||||
-rw-r--r-- | storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp | 28 | ||||
-rw-r--r-- | storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp | 6 | ||||
-rw-r--r-- | storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 177 | ||||
-rw-r--r-- | storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 4 | ||||
-rw-r--r-- | storage/ndb/src/kernel/main.cpp | 4 | ||||
-rw-r--r-- | storage/ndb/src/kernel/vm/pc.hpp | 2 | ||||
-rw-r--r-- | storage/ndb/test/ndbapi/testDict.cpp | 321 |
17 files changed, 968 insertions, 16 deletions
diff --git a/storage/ndb/include/kernel/GlobalSignalNumbers.h b/storage/ndb/include/kernel/GlobalSignalNumbers.h index f2808e9b15c..950b2629d9e 100644 --- a/storage/ndb/include/kernel/GlobalSignalNumbers.h +++ b/storage/ndb/include/kernel/GlobalSignalNumbers.h @@ -517,16 +517,12 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES; #define GSN_TEST_ORD 407 #define GSN_TESTSIG 408 #define GSN_TIME_SIGNAL 409 -/* 410 unused */ -/* 411 unused */ -/* 412 unused */ #define GSN_TUP_ABORTREQ 414 #define GSN_TUP_ADD_ATTCONF 415 #define GSN_TUP_ADD_ATTRREF 416 #define GSN_TUP_ADD_ATTRREQ 417 #define GSN_TUP_ATTRINFO 418 #define GSN_TUP_COMMITREQ 419 -/* 420 unused */ /* 421 unused */ /* 422 unused */ @@ -981,4 +977,10 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES; #define GSN_DICT_ABORT_REF 668 #define GSN_DICT_ABORT_CONF 669 +/* DICT LOCK signals */ +#define GSN_DICT_LOCK_REQ 410 +#define GSN_DICT_LOCK_CONF 411 +#define GSN_DICT_LOCK_REF 412 +#define GSN_DICT_UNLOCK_ORD 420 + #endif diff --git a/storage/ndb/include/kernel/signaldata/AlterTable.hpp b/storage/ndb/include/kernel/signaldata/AlterTable.hpp index 1cdc7c00fcb..afd90541c9e 100644 --- a/storage/ndb/include/kernel/signaldata/AlterTable.hpp +++ b/storage/ndb/include/kernel/signaldata/AlterTable.hpp @@ -196,6 +196,7 @@ public: InvalidTableVersion = 241, DropInProgress = 283, Busy = 701, + BusyWithNR = 711, NotMaster = 702, InvalidFormat = 703, AttributeNameTooLong = 704, diff --git a/storage/ndb/include/kernel/signaldata/CreateTable.hpp b/storage/ndb/include/kernel/signaldata/CreateTable.hpp index b43a5e76eaf..e5e78bddd49 100644 --- a/storage/ndb/include/kernel/signaldata/CreateTable.hpp +++ b/storage/ndb/include/kernel/signaldata/CreateTable.hpp @@ -77,6 +77,7 @@ public: enum ErrorCode { NoError = 0, Busy = 701, + BusyWithNR = 711, NotMaster = 702, InvalidFormat = 703, AttributeNameTooLong = 704, diff --git a/storage/ndb/include/kernel/signaldata/DropTable.hpp b/storage/ndb/include/kernel/signaldata/DropTable.hpp index cae6aff8754..e762446d2b8 100644 --- a/storage/ndb/include/kernel/signaldata/DropTable.hpp +++ b/storage/ndb/include/kernel/signaldata/DropTable.hpp @@ -53,6 +53,7 @@ public: enum ErrorCode { Busy = 701, + BusyWithNR = 711, NotMaster = 702, NoSuchTable = 709, InvalidTableVersion = 241, diff --git a/storage/ndb/include/ndb_version.h.in b/storage/ndb/include/ndb_version.h.in index 5b67796a019..ef35fa4093b 100644 --- a/storage/ndb/include/ndb_version.h.in +++ b/storage/ndb/include/ndb_version.h.in @@ -64,5 +64,8 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ]; #define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17) #define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18) #define NDBD_FRAGID_VERSION (MAKE_VERSION(5,1,6)) +#define NDBD_DICT_LOCK_VERSION_5 MAKE_VERSION(5,0,23) +#define NDBD_DICT_LOCK_VERSION_5_1 MAKE_VERSION(5,1,12) + #endif diff --git a/storage/ndb/src/common/debugger/SignalLoggerManager.cpp b/storage/ndb/src/common/debugger/SignalLoggerManager.cpp index d8710d2058f..67e13dc805a 100644 --- a/storage/ndb/src/common/debugger/SignalLoggerManager.cpp +++ b/storage/ndb/src/common/debugger/SignalLoggerManager.cpp @@ -139,7 +139,7 @@ SignalLoggerManager::log(LogMode logMode, const char * params) } else { for (int i = 0; i < count; ++i){ BlockNumber number = getBlockNo(blocks[i]); - cnt += log(SLM_ON, number-MIN_BLOCK_NO, logMode); + cnt += log(SLM_ON, number, logMode); } } for(int i = 0; i<count; i++){ diff --git a/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp b/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp index 2639b2da7be..7304a46a278 100644 --- a/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp +++ b/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp @@ -624,5 +624,12 @@ const GsnName SignalNames [] = { ,{ GSN_LCP_PREPARE_REQ, "LCP_PREPARE_REQ" } ,{ GSN_LCP_PREPARE_REF, "LCP_PREPARE_REF" } ,{ GSN_LCP_PREPARE_CONF, "LCP_PREPARE_CONF" } + + /* DICT LOCK */ + ,{ GSN_DICT_LOCK_REQ, "DICT_LOCK_REQ" } + ,{ GSN_DICT_LOCK_CONF, "DICT_LOCK_CONF" } + ,{ GSN_DICT_LOCK_REF, "DICT_LOCK_REF" } + ,{ GSN_DICT_UNLOCK_ORD, "DICT_UNLOCK_ORD" } + }; const unsigned short NO_OF_SIGNAL_NAMES = sizeof(SignalNames)/sizeof(GsnName); diff --git a/storage/ndb/src/kernel/blocks/ERROR_codes.txt b/storage/ndb/src/kernel/blocks/ERROR_codes.txt index d69faa90f72..867dc0d2efc 100644 --- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt @@ -5,7 +5,7 @@ Next DBACC 3002 Next DBTUP 4013 Next DBLQH 5043 Next DBDICT 6007 -Next DBDIH 7174 +Next DBDIH 7177 Next DBTC 8037 Next CMVMI 9000 Next BACKUP 10022 @@ -312,6 +312,10 @@ Test Crashes in handling node restarts 7170: Crash when receiving START_PERMREF (InitialStartRequired) +7174: Crash starting node before sending DICT_LOCK_REQ +7175: Master sends one fake START_PERMREF (ZNODE_ALREADY_STARTING_ERROR) +7176: Slave NR pretends master does not support DICT lock (rolling upgrade) + DICT: 6000 Crash during NR when receiving DICTSTARTREQ 6001 Crash during NR when receiving SCHEMA_INFO diff --git a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp index ef08c06822f..c346bdee695 100644 --- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp +++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp @@ -322,6 +322,11 @@ void Dbdict::execCONTINUEB(Signal* signal) sendGetTabResponse(signal); break; + case ZDICT_LOCK_POLL: + jam(); + checkDictLockQueue(signal, true); + break; + default : ndbrequire(false); break; @@ -1500,8 +1505,9 @@ Dbdict::Dbdict(Block_context& ctx): c_Trans(c_opRecordPool), c_opCreateObj(c_schemaOp), c_opDropObj(c_schemaOp), - c_opRecordSequence(0) - + c_opRecordSequence(0), + c_dictLockQueue(c_dictLockPool), + c_dictLockPoll(false) { BLOCK_CONSTRUCTOR(Dbdict); @@ -1670,6 +1676,8 @@ Dbdict::Dbdict(Block_context& ctx): addRecSignal(GSN_DICT_ABORT_REF, &Dbdict::execDICT_ABORT_REF); addRecSignal(GSN_DICT_ABORT_CONF, &Dbdict::execDICT_ABORT_CONF); + addRecSignal(GSN_DICT_LOCK_REQ, &Dbdict::execDICT_LOCK_REQ); + addRecSignal(GSN_DICT_UNLOCK_ORD, &Dbdict::execDICT_UNLOCK_ORD); }//Dbdict::Dbdict() Dbdict::~Dbdict() @@ -2061,6 +2069,8 @@ void Dbdict::execREAD_CONFIG_REQ(Signal* signal) c_opCreateTrigger.setSize(8); c_opDropTrigger.setSize(8); c_opAlterTrigger.setSize(8); + + c_dictLockPool.setSize(32); // Initialize schema file copies c_schemaFile[0].schemaPage = @@ -3535,6 +3545,10 @@ void Dbdict::execNODE_FAILREP(Signal* signal) c_blockState = BS_NODE_FAILURE; ok = true; break; + case BS_NODE_RESTART: + jam(); + ok = true; + break; } ndbrequire(ok); @@ -3557,6 +3571,15 @@ void Dbdict::execNODE_FAILREP(Signal* signal) }//if }//for + /* + * NODE_FAILREP guarantees that no "in flight" signal from + * a dead node is accepted, and also that the job buffer contains + * no such (un-executed) signals. Therefore no DICT_UNLOCK_ORD + * from a dead node (leading to master crash) is possible after + * this clean-up removes the lock record. + */ + removeStaleDictLocks(signal, theFailedNodes); + }//execNODE_FAILREP() @@ -3625,6 +3648,12 @@ Dbdict::execCREATE_TABLE_REQ(Signal* signal){ break; } + if (c_blockState == BS_NODE_RESTART){ + jam(); + parseRecord.errorCode = CreateTableRef::BusyWithNR; + break; + } + if (c_blockState != BS_IDLE){ jam(); parseRecord.errorCode = CreateTableRef::Busy; @@ -3804,6 +3833,12 @@ Dbdict::execALTER_TABLE_REQ(Signal* signal) return; } + if(c_blockState == BS_NODE_RESTART){ + jam(); + alterTableRef(signal, req, AlterTableRef::BusyWithNR); + return; + } + if(c_blockState != BS_IDLE){ jam(); alterTableRef(signal, req, AlterTableRef::Busy); @@ -6357,6 +6392,12 @@ Dbdict::execDROP_TABLE_REQ(Signal* signal){ return; } + if(c_blockState == BS_NODE_RESTART){ + jam(); + dropTableRef(signal, req, DropTableRef::BusyWithNR); + return; + } + if(c_blockState != BS_IDLE){ jam(); dropTableRef(signal, req, DropTableRef::Busy); @@ -13279,6 +13320,275 @@ Dbdict::getIndexAttrMask(TableRecordPtr indexPtr, AttributeMask& mask) } } +// DICT lock master + +const Dbdict::DictLockType* +Dbdict::getDictLockType(Uint32 lockType) +{ + static const DictLockType lt[] = { + { DictLockReq::NodeRestartLock, BS_NODE_RESTART, "NodeRestart" } + }; + for (int i = 0; i < sizeof(lt)/sizeof(lt[0]); i++) { + if (lt[i].lockType == lockType) + return <[i]; + } + return NULL; +} + +void +Dbdict::sendDictLockInfoEvent(Uint32 pollCount) +{ + DictLockPtr loopPtr; + c_dictLockQueue.first(loopPtr); + unsigned count = 0; + + char queue_buf[100]; + char *p = &queue_buf[0]; + const char *const q = &queue_buf[sizeof(queue_buf)]; + *p = 0; + + while (loopPtr.i != RNIL) { + jam(); + my_snprintf(p, q-p, "%s%u%s", + ++count == 1 ? "" : " ", + (unsigned)refToNode(loopPtr.p->req.userRef), + loopPtr.p->locked ? "L" : ""); + p += strlen(p); + c_dictLockQueue.next(loopPtr); + } + + infoEvent("DICT: lock bs: %d ops: %d poll: %d cnt: %d queue: %s", + (int)c_blockState, + c_opRecordPool.getSize() - c_opRecordPool.getNoOfFree(), + c_dictLockPoll, (int)pollCount, queue_buf); +} + +void +Dbdict::sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text) +{ + infoEvent("DICT: %s %u for %s", + text, + (unsigned)refToNode(lockPtr.p->req.userRef), lockPtr.p->lt->text); +} + +void +Dbdict::execDICT_LOCK_REQ(Signal* signal) +{ + jamEntry(); + const DictLockReq* req = (const DictLockReq*)&signal->theData[0]; + + // make sure bad request crashes slave, not master (us) + + if (getOwnNodeId() != c_masterNodeId) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::NotMaster); + return; + } + + const DictLockType* lt = getDictLockType(req->lockType); + if (lt == NULL) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::InvalidLockType); + return; + } + + if (req->userRef != signal->getSendersBlockRef() || + getNodeInfo(refToNode(req->userRef)).m_type != NodeInfo::DB) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::BadUserRef); + return; + } + + if (c_aliveNodes.get(refToNode(req->userRef))) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::TooLate); + return; + } + + DictLockPtr lockPtr; + if (! c_dictLockQueue.seize(lockPtr)) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::TooManyRequests); + return; + } + + lockPtr.p->req = *req; + lockPtr.p->locked = false; + lockPtr.p->lt = lt; + + checkDictLockQueue(signal, false); + + if (! lockPtr.p->locked) + sendDictLockInfoEvent(lockPtr, "lock request by node"); +} + +void +Dbdict::checkDictLockQueue(Signal* signal, bool poll) +{ + Uint32 pollCount = ! poll ? 0 : signal->theData[1]; + + DictLockPtr lockPtr; + + do { + if (! c_dictLockQueue.first(lockPtr)) { + jam(); + setDictLockPoll(signal, false, pollCount); + return; + } + + if (lockPtr.p->locked) { + jam(); + ndbrequire(c_blockState == lockPtr.p->lt->blockState); + break; + } + + if (c_opRecordPool.getNoOfFree() != c_opRecordPool.getSize()) { + jam(); + break; + } + + ndbrequire(c_blockState == BS_IDLE); + lockPtr.p->locked = true; + c_blockState = lockPtr.p->lt->blockState; + sendDictLockConf(signal, lockPtr); + + sendDictLockInfoEvent(lockPtr, "locked by node"); + } while (0); + + // poll while first request is open + // this routine is called again when it is removed for any reason + + bool on = ! lockPtr.p->locked; + setDictLockPoll(signal, on, pollCount); +} + +void +Dbdict::execDICT_UNLOCK_ORD(Signal* signal) +{ + jamEntry(); + const DictUnlockOrd* ord = (const DictUnlockOrd*)&signal->theData[0]; + + DictLockPtr lockPtr; + c_dictLockQueue.getPtr(lockPtr, ord->lockPtr); + ndbrequire(lockPtr.p->lt->lockType == ord->lockType); + + if (lockPtr.p->locked) { + jam(); + ndbrequire(c_blockState == lockPtr.p->lt->blockState); + ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize()); + ndbrequire(! c_dictLockQueue.hasPrev(lockPtr)); + + c_blockState = BS_IDLE; + sendDictLockInfoEvent(lockPtr, "unlocked by node"); + } else { + sendDictLockInfoEvent(lockPtr, "lock request removed by node"); + } + + c_dictLockQueue.release(lockPtr); + + checkDictLockQueue(signal, false); +} + +void +Dbdict::sendDictLockConf(Signal* signal, DictLockPtr lockPtr) +{ + DictLockConf* conf = (DictLockConf*)&signal->theData[0]; + const DictLockReq& req = lockPtr.p->req; + + conf->userPtr = req.userPtr; + conf->lockType = req.lockType; + conf->lockPtr = lockPtr.i; + + sendSignal(req.userRef, GSN_DICT_LOCK_CONF, signal, + DictLockConf::SignalLength, JBB); +} + +void +Dbdict::sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode) +{ + DictLockRef* ref = (DictLockRef*)&signal->theData[0]; + + ref->userPtr = req.userPtr; + ref->lockType = req.lockType; + ref->errorCode = errorCode; + + sendSignal(req.userRef, GSN_DICT_LOCK_REF, signal, + DictLockRef::SignalLength, JBB); +} + +// control polling + +void +Dbdict::setDictLockPoll(Signal* signal, bool on, Uint32 pollCount) +{ + if (on) { + jam(); + signal->theData[0] = ZDICT_LOCK_POLL; + signal->theData[1] = pollCount + 1; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2); + } + + bool change = (c_dictLockPoll != on); + + if (change) { + jam(); + c_dictLockPoll = on; + } + + // avoid too many messages if master is stuck busy (BS_NODE_FAILURE) + bool periodic = + pollCount < 8 || + pollCount < 64 && pollCount % 8 == 0 || + pollCount < 512 && pollCount % 64 == 0 || + pollCount < 4096 && pollCount % 512 == 0 || + pollCount % 4096 == 0; // about every 6 minutes + + if (change || periodic) + sendDictLockInfoEvent(pollCount); +} + +// NF handling + +void +Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes) +{ + DictLockPtr loopPtr; + c_dictLockQueue.first(loopPtr); + + if (getOwnNodeId() != c_masterNodeId) { + ndbrequire(loopPtr.i == RNIL); + return; + } + + while (loopPtr.i != RNIL) { + jam(); + DictLockPtr lockPtr = loopPtr; + c_dictLockQueue.next(loopPtr); + + Uint32 nodeId = refToNode(lockPtr.p->req.userRef); + + if (NodeBitmask::get(theFailedNodes, nodeId)) { + if (lockPtr.p->locked) { + jam(); + ndbrequire(c_blockState == lockPtr.p->lt->blockState); + ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize()); + ndbrequire(! c_dictLockQueue.hasPrev(lockPtr)); + + c_blockState = BS_IDLE; + + sendDictLockInfoEvent(lockPtr, "remove lock by failed node"); + } else { + sendDictLockInfoEvent(lockPtr, "remove lock request by failed node"); + } + + c_dictLockQueue.release(lockPtr); + } + } + + checkDictLockQueue(signal, false); +} + + /* **************************************************************** */ /* ---------------------------------------------------------------- */ /* MODULE: STORE/RESTORE SCHEMA FILE---------------------- */ diff --git a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp index c770b6c69b8..f3243683d76 100644 --- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp +++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp @@ -51,6 +51,7 @@ #include <signaldata/CreateTrig.hpp> #include <signaldata/DropTrig.hpp> #include <signaldata/AlterTrig.hpp> +#include <signaldata/DictLock.hpp> #include "SchemaFile.hpp" #include <blocks/mutexes.hpp> #include <SafeCounter.hpp> @@ -68,6 +69,7 @@ /*--------------------------------------------------------------*/ #define ZPACK_TABLE_INTO_PAGES 0 #define ZSEND_GET_TAB_RESPONSE 3 +#define ZDICT_LOCK_POLL 4 /*--------------------------------------------------------------*/ @@ -812,6 +814,9 @@ private: void execDROP_FILEGROUP_REF(Signal* signal); void execDROP_FILEGROUP_CONF(Signal* signal); + void execDICT_LOCK_REQ(Signal* signal); + void execDICT_UNLOCK_ORD(Signal* signal); + /* * 2.4 COMMON STORED VARIABLES */ @@ -1046,12 +1051,43 @@ private: // State variables /* ----------------------------------------------------------------------- */ +#ifndef ndb_dbdict_log_block_state enum BlockState { BS_IDLE = 0, BS_CREATE_TAB = 1, BS_BUSY = 2, - BS_NODE_FAILURE = 3 + BS_NODE_FAILURE = 3, + BS_NODE_RESTART = 4 + }; +#else // quick hack to log changes + enum { + BS_IDLE = 0, + BS_CREATE_TAB = 1, + BS_BUSY = 2, + BS_NODE_FAILURE = 3, + BS_NODE_RESTART = 4 }; + struct BlockState; + friend struct BlockState; + struct BlockState { + BlockState() : + m_value(BS_IDLE) { + } + BlockState(int value) : + m_value(value) { + } + operator int() const { + return m_value; + } + BlockState& operator=(const BlockState& bs) { + Dbdict* dict = (Dbdict*)globalData.getBlock(DBDICT); + dict->infoEvent("DICT: bs %d->%d", m_value, bs.m_value); + m_value = bs.m_value; + return *this; + } + int m_value; + }; +#endif BlockState c_blockState; struct PackTable { @@ -2015,6 +2051,65 @@ private: // Unique key for operation XXX move to some system table Uint32 c_opRecordSequence; + /* + * Master DICT can be locked in 2 mutually exclusive ways: + * + * 1) for schema ops, via operation records + * 2) against schema ops, via a lock queue + * + * Current use of 2) is by a starting node, to prevent schema ops + * until started. The ops are refused (BlockState != BS_IDLE), + * not queued. + * + * Master failure is not handled, in node start case the starting + * node will crash too anyway. Use lock table in future.. + * + * The lock queue is "serial" but other behaviour is possible + * by checking lock types e.g. to allow parallel node starts. + * + * Checking release of last op record is not convenient with + * current structure (5.0). Instead we poll via continueB. + * + * XXX only table ops check BlockState + */ + + struct DictLockType { + DictLockReq::LockType lockType; + BlockState blockState; + const char* text; + }; + + struct DictLockRecord { + DictLockReq req; + const DictLockType* lt; + bool locked; + union { + Uint32 nextPool; + Uint32 nextList; + }; + Uint32 prevList; + }; + + typedef Ptr<DictLockRecord> DictLockPtr; + ArrayPool<DictLockRecord> c_dictLockPool; + DLFifoList<DictLockRecord> c_dictLockQueue; + bool c_dictLockPoll; + + static const DictLockType* getDictLockType(Uint32 lockType); + void sendDictLockInfoEvent(Uint32 pollCount); + void sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text); + + void checkDictLockQueue(Signal* signal, bool poll); + void sendDictLockConf(Signal* signal, DictLockPtr lockPtr); + void sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode); + + // control polling i.e. continueB loop + void setDictLockPoll(Signal* signal, bool on, Uint32 pollCount); + + // NF handling + void removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes); + + // Statement blocks /* ------------------------------------------------------------ */ diff --git a/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp b/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp index f98df82ea7d..a8dad87d81c 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp +++ b/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp @@ -718,6 +718,9 @@ private: void checkPrepDropTabComplete(Signal *, TabRecordPtr tabPtr); void checkWaitDropTabFailedLqh(Signal *, Uint32 nodeId, Uint32 tableId); + void execDICT_LOCK_CONF(Signal* signal); + void execDICT_LOCK_REF(Signal* signal); + // Statement blocks //------------------------------------ // Methods that send signals @@ -935,6 +938,7 @@ private: void initialStartCompletedLab(Signal *); void allNodesLcpCompletedLab(Signal *); void nodeRestartPh2Lab(Signal *); + void nodeRestartPh2Lab2(Signal *); void initGciFilesLab(Signal *); void dictStartConfLab(Signal *); void nodeDictStartConfLab(Signal *); @@ -1603,6 +1607,30 @@ private: void startInfoReply(Signal *, Uint32 nodeId); void dump_replica_info(); + + /* + * Lock master DICT. Only current use is by starting node + * during NR. A pool of slave records is convenient anyway. + */ + struct DictLockSlaveRecord { + Uint32 lockPtr; + Uint32 lockType; + bool locked; + Callback callback; + Uint32 nextPool; + }; + + typedef Ptr<DictLockSlaveRecord> DictLockSlavePtr; + ArrayPool<DictLockSlaveRecord> c_dictLockSlavePool; + + // slave + void sendDictLockReq(Signal* signal, Uint32 lockType, Callback c); + void recvDictLockConf(Signal* signal); + void sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI); + + // NR + Uint32 c_dictLockSlavePtrI_nodeRestart; // userPtr for NR + void recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret); }; #if (DIH_CDATA_SIZE < _SYSFILE_SIZE32) diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp index a6ec3749606..ec4ac3c812a 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp +++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp @@ -53,6 +53,9 @@ void Dbdih::initData() waitGCPProxyPool.setSize(ZPROXY_FILE_SIZE); waitGCPMasterPool.setSize(ZPROXY_MASTER_FILE_SIZE); + c_dictLockSlavePool.setSize(1); // assert single usage + c_dictLockSlavePtrI_nodeRestart = RNIL; + cgcpOrderBlocked = 0; c_lcpState.ctcCounter = 0; cwaitLcpSr = false; @@ -251,6 +254,9 @@ Dbdih::Dbdih(Block_context& ctx): addRecSignal(GSN_CREATE_FRAGMENTATION_REQ, &Dbdih::execCREATE_FRAGMENTATION_REQ); + addRecSignal(GSN_DICT_LOCK_CONF, &Dbdih::execDICT_LOCK_CONF); + addRecSignal(GSN_DICT_LOCK_REF, &Dbdih::execDICT_LOCK_REF); + apiConnectRecord = 0; connectRecord = 0; fileRecord = 0; diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index bb4c2ed197e..4d9a28f32d4 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -68,6 +68,7 @@ #include <signaldata/LqhFrag.hpp> #include <signaldata/FsOpenReq.hpp> #include <signaldata/DihFragCount.hpp> +#include <signaldata/DictLock.hpp> #include <DebuggerNames.hpp> #include <EventLogger.hpp> @@ -545,7 +546,7 @@ void Dbdih::execCONTINUEB(Signal* signal) break; case DihContinueB::ZSTART_PERMREQ_AGAIN: jam(); - nodeRestartPh2Lab(signal); + nodeRestartPh2Lab2(signal); return; break; case DihContinueB::SwitchReplica: @@ -1295,6 +1296,7 @@ void Dbdih::execNDB_STTOR(Signal* signal) case NodeState::ST_INITIAL_NODE_RESTART: case NodeState::ST_NODE_RESTART: jam(); + /*********************************************************************** * When starting nodes while system is operational we must be controlled * by the master since only one node restart is allowed at a time. @@ -1305,7 +1307,7 @@ void Dbdih::execNDB_STTOR(Signal* signal) req->startingRef = reference(); req->startingVersion = 0; // Obsolete sendSignal(cmasterdihref, GSN_START_MEREQ, signal, - StartMeReq::SignalLength, JBB); + StartMeReq::SignalLength, JBB); return; } ndbrequire(false); @@ -1365,6 +1367,24 @@ void Dbdih::execNDB_STTOR(Signal* signal) } ndbrequire(false); break; + case ZNDB_SPH7: + jam(); + switch (typestart) { + case NodeState::ST_INITIAL_START: + case NodeState::ST_SYSTEM_RESTART: + jam(); + ndbsttorry10Lab(signal, __LINE__); + return; + case NodeState::ST_NODE_RESTART: + case NodeState::ST_INITIAL_NODE_RESTART: + jam(); + sendDictUnlockOrd(signal, c_dictLockSlavePtrI_nodeRestart); + c_dictLockSlavePtrI_nodeRestart = RNIL; + ndbsttorry10Lab(signal, __LINE__); + return; + } + ndbrequire(false); + break; default: jam(); ndbsttorry10Lab(signal, __LINE__); @@ -1575,6 +1595,34 @@ void Dbdih::execREAD_NODESCONF(Signal* signal) /*---------------------------------------------------------------------------*/ void Dbdih::nodeRestartPh2Lab(Signal* signal) { + /* + * Lock master DICT to avoid metadata operations during INR/NR. + * Done just before START_PERMREQ. + * + * It would be more elegant to do this just before START_MEREQ. + * The problem is, on INR we end up in massive invalidateNodeLCP + * which is not fully protected against metadata ops. + */ + ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL); + + // check that we are not yet taking part in schema ops + CRASH_INSERTION(7174); + + Uint32 lockType = DictLockReq::NodeRestartLock; + Callback c = { safe_cast(&Dbdih::recvDictLockConf_nodeRestart), 0 }; + sendDictLockReq(signal, lockType, c); +} + +void Dbdih::recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret) +{ + ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL); + c_dictLockSlavePtrI_nodeRestart = data; + + nodeRestartPh2Lab2(signal); +} + +void Dbdih::nodeRestartPh2Lab2(Signal* signal) +{ /*------------------------------------------------------------------------*/ // REQUEST FOR PERMISSION FROM MASTER TO START A NODE IN AN ALREADY // RUNNING SYSTEM. @@ -1585,7 +1633,7 @@ void Dbdih::nodeRestartPh2Lab(Signal* signal) req->nodeId = cownNodeId; req->startType = cstarttype; sendSignal(cmasterdihref, GSN_START_PERMREQ, signal, 3, JBB); -}//Dbdih::nodeRestartPh2Lab() +} void Dbdih::execSTART_PERMCONF(Signal* signal) { @@ -1710,12 +1758,12 @@ void Dbdih::execSTART_PERMREQ(Signal* signal) const BlockReference retRef = req->blockRef; const Uint32 nodeId = req->nodeId; const Uint32 typeStart = req->startType; - CRASH_INSERTION(7122); ndbrequire(isMaster()); ndbrequire(refToNode(retRef) == nodeId); if ((c_nodeStartMaster.activeState) || - (c_nodeStartMaster.wait != ZFALSE)) { + (c_nodeStartMaster.wait != ZFALSE) || + ERROR_INSERTED_CLEAR(7175)) { jam(); signal->theData[0] = nodeId; signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR; @@ -10780,6 +10828,10 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal) c_copyGCIMaster.m_copyReason, c_copyGCIMaster.m_waiting); break; + case GCP_READY: // shut up lint + case GCP_PREPARE_SENT: + case GCP_COMMIT_SENT: + break; } ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d", @@ -14978,3 +15030,118 @@ Dbdih::NodeRecord::NodeRecord(){ copyCompleted = false; allowNodeStart = true; } + +// DICT lock slave + +void +Dbdih::sendDictLockReq(Signal* signal, Uint32 lockType, Callback c) +{ + DictLockReq* req = (DictLockReq*)&signal->theData[0]; + DictLockSlavePtr lockPtr; + + c_dictLockSlavePool.seize(lockPtr); + ndbrequire(lockPtr.i != RNIL); + + req->userPtr = lockPtr.i; + req->lockType = lockType; + req->userRef = reference(); + + lockPtr.p->lockPtr = RNIL; + lockPtr.p->lockType = lockType; + lockPtr.p->locked = false; + lockPtr.p->callback = c; + + // handle rolling upgrade + { + Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version; + + unsigned int get_major = getMajor(masterVersion); + unsigned int get_minor = getMinor(masterVersion); + unsigned int get_build = getBuild(masterVersion); + + ndbrequire(get_major == 4 || get_major == 5); + + if (masterVersion < NDBD_DICT_LOCK_VERSION_5 || + ERROR_INSERTED(7176)) { + jam(); + + infoEvent("DIH: detect upgrade: master node %u old version %u.%u.%u", + (unsigned int)cmasterNodeId, get_major, get_minor, get_build); + + DictLockConf* conf = (DictLockConf*)&signal->theData[0]; + conf->userPtr = lockPtr.i; + conf->lockType = lockType; + conf->lockPtr = ZNIL; + + sendSignal(reference(), GSN_DICT_LOCK_CONF, signal, + DictLockConf::SignalLength, JBB); + return; + } + } + + BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId); + sendSignal(dictMasterRef, GSN_DICT_LOCK_REQ, signal, + DictLockReq::SignalLength, JBB); +} + +void +Dbdih::execDICT_LOCK_CONF(Signal* signal) +{ + jamEntry(); + recvDictLockConf(signal); +} + +void +Dbdih::execDICT_LOCK_REF(Signal* signal) +{ + jamEntry(); + ndbrequire(false); +} + +void +Dbdih::recvDictLockConf(Signal* signal) +{ + const DictLockConf* conf = (const DictLockConf*)&signal->theData[0]; + + DictLockSlavePtr lockPtr; + c_dictLockSlavePool.getPtr(lockPtr, conf->userPtr); + + lockPtr.p->lockPtr = conf->lockPtr; + ndbrequire(lockPtr.p->lockType == conf->lockType); + ndbrequire(lockPtr.p->locked == false); + lockPtr.p->locked = true; + + lockPtr.p->callback.m_callbackData = lockPtr.i; + execute(signal, lockPtr.p->callback, 0); +} + +void +Dbdih::sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI) +{ + DictUnlockOrd* ord = (DictUnlockOrd*)&signal->theData[0]; + + DictLockSlavePtr lockPtr; + c_dictLockSlavePool.getPtr(lockPtr, lockSlavePtrI); + + ord->lockPtr = lockPtr.p->lockPtr; + ord->lockType = lockPtr.p->lockType; + + c_dictLockSlavePool.release(lockPtr); + + // handle rolling upgrade + { + Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version; + + unsigned int get_major = getMajor(masterVersion); + ndbrequire(get_major == 4 || get_major == 5); + + if (masterVersion < NDBD_DICT_LOCK_VERSION_5 || + ERROR_INSERTED(7176)) { + return; + } + } + + BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId); + sendSignal(dictMasterRef, GSN_DICT_UNLOCK_ORD, signal, + DictUnlockOrd::SignalLength, JBB); +} diff --git a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index ebc40cb385d..1eac369ec65 100644 --- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -2477,7 +2477,7 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) { jam(); CRASH_INSERTION(932); - BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); + BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId); progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); ndbrequire(false); } @@ -2500,7 +2500,7 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) ndbrequire(false); case ZAPI_INACTIVE: { - BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); + BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId); progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); ndbrequire(false); } diff --git a/storage/ndb/src/kernel/main.cpp b/storage/ndb/src/kernel/main.cpp index b380a4e74c3..2eb6cf4d869 100644 --- a/storage/ndb/src/kernel/main.cpp +++ b/storage/ndb/src/kernel/main.cpp @@ -420,6 +420,10 @@ int main(int argc, char** argv) FILE * signalLog = fopen(buf, "a"); globalSignalLoggers.setOwnNodeId(globalData.ownId); globalSignalLoggers.setOutputStream(signalLog); +#if 0 // to log startup + globalSignalLoggers.log(SignalLoggerManager::LogInOut, "BLOCK=DBDICT,DBDIH"); + globalData.testOn = 1; +#endif #endif catchsigs(false); diff --git a/storage/ndb/src/kernel/vm/pc.hpp b/storage/ndb/src/kernel/vm/pc.hpp index e6efad65779..6b9b563aa27 100644 --- a/storage/ndb/src/kernel/vm/pc.hpp +++ b/storage/ndb/src/kernel/vm/pc.hpp @@ -125,11 +125,13 @@ #ifdef ERROR_INSERT #define ERROR_INSERT_VARIABLE UintR cerrorInsert #define ERROR_INSERTED(x) (cerrorInsert == (x)) +#define ERROR_INSERTED_CLEAR(x) (cerrorInsert == (x) ? (cerrorInsert = 0, true) : false) #define SET_ERROR_INSERT_VALUE(x) cerrorInsert = x #define CLEAR_ERROR_INSERT_VALUE cerrorInsert = 0 #else #define ERROR_INSERT_VARIABLE typedef void * cerrorInsert // Will generate compiler error if used #define ERROR_INSERTED(x) false +#define ERROR_INSERTED_CLEAR(x) false #define SET_ERROR_INSERT_VALUE(x) #define CLEAR_ERROR_INSERT_VALUE #endif diff --git a/storage/ndb/test/ndbapi/testDict.cpp b/storage/ndb/test/ndbapi/testDict.cpp index b015e16bae1..6eb2ac3fba7 100644 --- a/storage/ndb/test/ndbapi/testDict.cpp +++ b/storage/ndb/test/ndbapi/testDict.cpp @@ -1644,6 +1644,299 @@ end: return result; } +// NFNR + +// Restarter controls dict ops : 1-run 2-pause 3-stop +// synced by polling... + +static bool +send_dict_ops_cmd(NDBT_Context* ctx, Uint32 cmd) +{ + ctx->setProperty("DictOps_CMD", cmd); + while (1) { + if (ctx->isTestStopped()) + return false; + if (ctx->getProperty("DictOps_ACK") == cmd) + break; + NdbSleep_MilliSleep(100); + } + return true; +} + +static bool +recv_dict_ops_run(NDBT_Context* ctx) +{ + while (1) { + if (ctx->isTestStopped()) + return false; + Uint32 cmd = ctx->getProperty("DictOps_CMD"); + ctx->setProperty("DictOps_ACK", cmd); + if (cmd == 1) + break; + if (cmd == 3) + return false; + NdbSleep_MilliSleep(100); + } + return true; +} + +int +runRestarts(NDBT_Context* ctx, NDBT_Step* step) +{ + static int errlst_master[] = { // non-crashing + 7175, // send one fake START_PERMREF + 0 + }; + static int errlst_node[] = { + 7174, // crash before sending DICT_LOCK_REQ + 7176, // pretend master does not support DICT lock + 7121, // crash at receive START_PERMCONF + 0 + }; + const uint errcnt_master = sizeof(errlst_master)/sizeof(errlst_master[0]); + const uint errcnt_node = sizeof(errlst_node)/sizeof(errlst_node[0]); + + myRandom48Init(NdbTick_CurrentMillisecond()); + NdbRestarter restarter; + int result = NDBT_OK; + const int loops = ctx->getNumLoops(); + + for (int l = 0; l < loops && result == NDBT_OK; l++) { + g_info << "1: === loop " << l << " ===" << endl; + + // assuming 2-way replicated + + int numnodes = restarter.getNumDbNodes(); + CHECK(numnodes >= 1); + if (numnodes == 1) + break; + + int masterNodeId = restarter.getMasterNodeId(); + CHECK(masterNodeId != -1); + + // for more complex cases need more restarter support methods + + int nodeIdList[2] = { 0, 0 }; + int nodeIdCnt = 0; + + if (numnodes >= 2) { + int rand = myRandom48(numnodes); + int nodeId = restarter.getRandomNotMasterNodeId(rand); + CHECK(nodeId != -1); + nodeIdList[nodeIdCnt++] = nodeId; + } + + if (numnodes >= 4 && myRandom48(2) == 0) { + int rand = myRandom48(numnodes); + int nodeId = restarter.getRandomNodeOtherNodeGroup(nodeIdList[0], rand); + CHECK(nodeId != -1); + if (nodeId != masterNodeId) + nodeIdList[nodeIdCnt++] = nodeId; + } + + g_info << "1: master=" << masterNodeId << " nodes=" << nodeIdList[0] << "," << nodeIdList[1] << endl; + + const uint timeout = 60; //secs for node wait + const unsigned maxsleep = 2000; //ms + + bool NF_ops = ctx->getProperty("Restart_NF_ops"); + uint NF_type = ctx->getProperty("Restart_NF_type"); + bool NR_ops = ctx->getProperty("Restart_NR_ops"); + bool NR_error = ctx->getProperty("Restart_NR_error"); + + g_info << "1: " << (NF_ops ? "run" : "pause") << " dict ops" << endl; + if (! send_dict_ops_cmd(ctx, NF_ops ? 1 : 2)) + break; + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + { + for (int i = 0; i < nodeIdCnt; i++) { + int nodeId = nodeIdList[i]; + + bool nostart = true; + bool abort = NF_type == 0 ? myRandom48(2) : (NF_type == 2); + bool initial = myRandom48(2); + + char flags[40]; + strcpy(flags, "flags: nostart"); + if (abort) + strcat(flags, ",abort"); + if (initial) + strcat(flags, ",initial"); + + g_info << "1: restart " << nodeId << " " << flags << endl; + CHECK(restarter.restartOneDbNode(nodeId, initial, nostart, abort) == 0); + } + } + + g_info << "1: wait for nostart" << endl; + CHECK(restarter.waitNodesNoStart(nodeIdList, nodeIdCnt, timeout) == 0); + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + int err_master = 0; + int err_node[2] = { 0, 0 }; + + if (NR_error) { + err_master = errlst_master[l % errcnt_master]; + + // limitation: cannot have 2 node restarts and crash_insert + // one node may die for real (NF during startup) + + for (int i = 0; i < nodeIdCnt && nodeIdCnt == 1; i++) { + err_node[i] = errlst_node[l % errcnt_node]; + + // 7176 - no DICT lock protection + + if (err_node[i] == 7176) { + g_info << "1: no dict ops due to error insert " + << err_node[i] << endl; + NR_ops = false; + } + } + } + + g_info << "1: " << (NR_ops ? "run" : "pause") << " dict ops" << endl; + if (! send_dict_ops_cmd(ctx, NR_ops ? 1 : 2)) + break; + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "1: start nodes" << endl; + CHECK(restarter.startNodes(nodeIdList, nodeIdCnt) == 0); + + if (NR_error) { + { + int err = err_master; + if (err != 0) { + g_info << "1: insert master error " << err << endl; + CHECK(restarter.insertErrorInNode(masterNodeId, err) == 0); + } + } + + for (int i = 0; i < nodeIdCnt; i++) { + int nodeId = nodeIdList[i]; + + int err = err_node[i]; + if (err != 0) { + g_info << "1: insert node " << nodeId << " error " << err << endl; + CHECK(restarter.insertErrorInNode(nodeId, err) == 0); + } + } + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "1: wait cluster started" << endl; + CHECK(restarter.waitClusterStarted(timeout) == 0); + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "1: restart done" << endl; + } + + g_info << "1: stop dict ops" << endl; + send_dict_ops_cmd(ctx, 3); + + return result; +} + +int +runDictOps(NDBT_Context* ctx, NDBT_Step* step) +{ + myRandom48Init(NdbTick_CurrentMillisecond()); + int result = NDBT_OK; + + for (int l = 0; result == NDBT_OK; l++) { + if (! recv_dict_ops_run(ctx)) + break; + + g_info << "2: === loop " << l << " ===" << endl; + + Ndb* pNdb = GETNDB(step); + NdbDictionary::Dictionary* pDic = pNdb->getDictionary(); + const NdbDictionary::Table* pTab = ctx->getTab(); + const char* tabName = pTab->getName(); + + const unsigned long maxsleep = 100; //ms + + g_info << "2: create table" << endl; + { + uint count = 0; + try_create: + count++; + if (pDic->createTable(*pTab) != 0) { + const NdbError err = pDic->getNdbError(); + if (count == 1) + g_err << "2: " << tabName << ": create failed: " << err << endl; + if (err.code != 711) { + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + goto try_create; + } + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "2: verify create" << endl; + const NdbDictionary::Table* pTab2 = pDic->getTable(tabName); + if (pTab2 == NULL) { + const NdbError err = pDic->getNdbError(); + g_err << "2: " << tabName << ": verify create: " << err << endl; + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + // replace by the Retrieved table + pTab = pTab2; + + int records = myRandom48(ctx->getNumRecords()); + g_info << "2: load " << records << " records" << endl; + HugoTransactions hugoTrans(*pTab); + if (hugoTrans.loadTable(pNdb, records) != 0) { + // XXX get error code from hugo + g_err << "2: " << tabName << ": load failed" << endl; + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "2: drop" << endl; + { + uint count = 0; + try_drop: + count++; + if (pDic->dropTable(tabName) != 0) { + const NdbError err = pDic->getNdbError(); + if (count == 1) + g_err << "2: " << tabName << ": drop failed: " << err << endl; + if (err.code != 711) { + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + goto try_drop; + } + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "2: verify drop" << endl; + const NdbDictionary::Table* pTab3 = pDic->getTable(tabName); + if (pTab3 != NULL) { + g_err << "2: " << tabName << ": verify drop: table exists" << endl; + result = NDBT_FAILED; + break; + } + if (pDic->getNdbError().code != 709) { + const NdbError err = pDic->getNdbError(); + g_err << "2: " << tabName << ": verify drop: " << err << endl; + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + } + + return result; +} + NDBT_TESTSUITE(testDict); TESTCASE("CreateAndDrop", "Try to create and drop the table loop number of times\n"){ @@ -1757,6 +2050,34 @@ TESTCASE("FailAddFragment", "Fail add fragment or attribute in ACC or TUP or TUX\n"){ INITIALIZER(runFailAddFragment); } +TESTCASE("Restart_NF1", + "DICT ops during node graceful shutdown (not master)"){ + TC_PROPERTY("Restart_NF_ops", 1); + TC_PROPERTY("Restart_NF_type", 1); + STEP(runRestarts); + STEP(runDictOps); +} +TESTCASE("Restart_NF2", + "DICT ops during node shutdown abort (not master)"){ + TC_PROPERTY("Restart_NF_ops", 1); + TC_PROPERTY("Restart_NF_type", 2); + STEP(runRestarts); + STEP(runDictOps); +} +TESTCASE("Restart_NR1", + "DICT ops during node startup (not master)"){ + TC_PROPERTY("Restart_NR_ops", 1); + STEP(runRestarts); + STEP(runDictOps); +} +TESTCASE("Restart_NR2", + "DICT ops during node startup with crash inserts (not master)"){ + TC_PROPERTY("Restart_NR_ops", 1); + TC_PROPERTY("Restart_NR_error", 1); + STEP(runRestarts); + STEP(runDictOps); +} + NDBT_TESTSUITE_END(testDict); int main(int argc, const char** argv){ |