diff options
Diffstat (limited to 'ndb')
-rw-r--r-- | ndb/include/kernel/signaldata/DictLock.hpp | 4 | ||||
-rw-r--r-- | ndb/include/ndb_version.h.in | 2 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/ERROR_codes.txt | 6 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbdict/Dbdict.cpp | 101 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbdict/Dbdict.hpp | 5 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbdict/DictLock.txt | 94 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 46 | ||||
-rw-r--r-- | ndb/test/ndbapi/testDict.cpp | 67 |
8 files changed, 278 insertions, 47 deletions
diff --git a/ndb/include/kernel/signaldata/DictLock.hpp b/ndb/include/kernel/signaldata/DictLock.hpp index c8f919f65a8..3e29d762962 100644 --- a/ndb/include/kernel/signaldata/DictLock.hpp +++ b/ndb/include/kernel/signaldata/DictLock.hpp @@ -55,7 +55,9 @@ public: enum ErrorCode { NotMaster = 1, InvalidLockType = 2, - TooManyRequests = 3 + BadUserRef = 3, + TooLate = 4, + TooManyRequests = 5 }; private: Uint32 userPtr; diff --git a/ndb/include/ndb_version.h.in b/ndb/include/ndb_version.h.in index 38b72306d03..7e878803f46 100644 --- a/ndb/include/ndb_version.h.in +++ b/ndb/include/ndb_version.h.in @@ -60,5 +60,7 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ]; #define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17) #define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18) +#define NDBD_DICT_LOCK_VERSION_5 MAKE_VERSION(5,0,23) + #endif diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index a63c1bef915..ddb99cb6b56 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -5,7 +5,7 @@ Next DBACC 3002 Next DBTUP 4013 Next DBLQH 5043 Next DBDICT 6007 -Next DBDIH 7175 +Next DBDIH 7177 Next DBTC 8037 Next CMVMI 9000 Next BACKUP 10022 @@ -312,7 +312,9 @@ Test Crashes in handling node restarts 7170: Crash when receiving START_PERMREF (InitialStartRequired) -7174: Send one fake START_PERMREF (ZNODE_ALREADY_STARTING_ERROR) +7174: Crash starting node before sending DICT_LOCK_REQ +7175: Master sends one fake START_PERMREF (ZNODE_ALREADY_STARTING_ERROR) +7176: Slave NR pretends master does not support DICT lock (rolling upgrade) DICT: 6000 Crash during NR when receiving DICTSTARTREQ diff --git a/ndb/src/kernel/blocks/dbdict/Dbdict.cpp b/ndb/src/kernel/blocks/dbdict/Dbdict.cpp index 73007bd9aad..3cdba251492 100644 --- a/ndb/src/kernel/blocks/dbdict/Dbdict.cpp +++ b/ndb/src/kernel/blocks/dbdict/Dbdict.cpp @@ -205,7 +205,7 @@ void Dbdict::execCONTINUEB(Signal* signal) case ZDICT_LOCK_POLL: jam(); - checkDictLockQueue(signal); + checkDictLockQueue(signal, true); break; default : @@ -2836,7 +2836,6 @@ void Dbdict::execNODE_FAILREP(Signal* signal) case BS_NODE_RESTART: jam(); ok = true; - removeStaleDictLocks(signal, theFailedNodes); break; } ndbrequire(ok); @@ -2860,6 +2859,15 @@ void Dbdict::execNODE_FAILREP(Signal* signal) }//if }//for + /* + * NODE_FAILREP guarantees that no "in flight" signal from + * a dead node is accepted, and also that the job buffer contains + * no such (un-executed) signals. Therefore no DICT_UNLOCK_ORD + * from a dead node (leading to master crash) is possible after + * this clean-up removes the lock record. + */ + removeStaleDictLocks(signal, theFailedNodes); + }//execNODE_FAILREP() @@ -12210,7 +12218,7 @@ Dbdict::getIndexAttrMask(TableRecordPtr indexPtr, AttributeMask& mask) const Dbdict::DictLockType* Dbdict::getDictLockType(Uint32 lockType) { - static DictLockType lt[] = { + static const DictLockType lt[] = { { DictLockReq::NodeRestartLock, BS_NODE_RESTART, "NodeRestart" } }; for (int i = 0; i < sizeof(lt)/sizeof(lt[0]); i++) { @@ -12221,11 +12229,39 @@ Dbdict::getDictLockType(Uint32 lockType) } void +Dbdict::sendDictLockInfoEvent(Uint32 pollCount) +{ + DictLockPtr loopPtr; + c_dictLockQueue.first(loopPtr); + unsigned count = 0; + + char queue_buf[100]; + char *p = &queue_buf[0]; + const char *const q = &queue_buf[sizeof(queue_buf)]; + *p = 0; + + while (loopPtr.i != RNIL) { + jam(); + my_snprintf(p, q-p, "%s%u%s", + ++count == 1 ? "" : " ", + (unsigned)refToNode(loopPtr.p->req.userRef), + loopPtr.p->locked ? "L" : ""); + p += strlen(p); + c_dictLockQueue.next(loopPtr); + } + + infoEvent("DICT: lock bs: %d ops: %d poll: %d cnt: %d queue: %s", + (int)c_blockState, + c_opRecordPool.getSize() - c_opRecordPool.getNoOfFree(), + c_dictLockPoll, (int)pollCount, queue_buf); +} + +void Dbdict::sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text) { infoEvent("DICT: %s %u for %s", text, - (unsigned int)refToNode(lockPtr.p->req.userRef), lockPtr.p->lt->text); + (unsigned)refToNode(lockPtr.p->req.userRef), lockPtr.p->lt->text); } void @@ -12234,6 +12270,8 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal) jamEntry(); const DictLockReq* req = (const DictLockReq*)&signal->theData[0]; + // make sure bad request crashes slave, not master (us) + if (getOwnNodeId() != c_masterNodeId) { jam(); sendDictLockRef(signal, *req, DictLockRef::NotMaster); @@ -12247,6 +12285,19 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal) return; } + if (req->userRef != signal->getSendersBlockRef() || + getNodeInfo(refToNode(req->userRef)).m_type != NodeInfo::DB) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::BadUserRef); + return; + } + + if (c_aliveNodes.get(refToNode(req->userRef))) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::TooLate); + return; + } + DictLockPtr lockPtr; if (! c_dictLockQueue.seize(lockPtr)) { jam(); @@ -12258,21 +12309,23 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal) lockPtr.p->locked = false; lockPtr.p->lt = lt; - checkDictLockQueue(signal); + checkDictLockQueue(signal, false); if (! lockPtr.p->locked) sendDictLockInfoEvent(lockPtr, "lock request by node"); } void -Dbdict::checkDictLockQueue(Signal* signal) +Dbdict::checkDictLockQueue(Signal* signal, bool poll) { + Uint32 pollCount = ! poll ? 0 : signal->theData[1]; + DictLockPtr lockPtr; do { if (! c_dictLockQueue.first(lockPtr)) { jam(); - setDictLockPoll(signal, false); + setDictLockPoll(signal, false, pollCount); return; } @@ -12299,7 +12352,7 @@ Dbdict::checkDictLockQueue(Signal* signal) // this routine is called again when it is removed for any reason bool on = ! lockPtr.p->locked; - setDictLockPoll(signal, on); + setDictLockPoll(signal, on, pollCount); } void @@ -12326,7 +12379,7 @@ Dbdict::execDICT_UNLOCK_ORD(Signal* signal) c_dictLockQueue.release(lockPtr); - checkDictLockQueue(signal); + checkDictLockQueue(signal, false); } void @@ -12359,21 +12412,32 @@ Dbdict::sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode) // control polling void -Dbdict::setDictLockPoll(Signal* signal, bool on) +Dbdict::setDictLockPoll(Signal* signal, bool on, Uint32 pollCount) { if (on) { jam(); signal->theData[0] = ZDICT_LOCK_POLL; - sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 1); + signal->theData[1] = pollCount + 1; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2); } - if (c_dictLockPoll != on) { + bool change = (c_dictLockPoll != on); + + if (change) { jam(); -#ifdef VM_TRACE - infoEvent("DICT: lock polling %s", on ? "On" : "Off"); -#endif c_dictLockPoll = on; } + + // avoid too many messages if master is stuck busy (BS_NODE_FAILURE) + bool periodic = + pollCount < 8 || + pollCount < 64 && pollCount % 8 == 0 || + pollCount < 512 && pollCount % 64 == 0 || + pollCount < 4096 && pollCount % 512 == 0 || + pollCount % 4096 == 0; // about every 6 minutes + + if (change || periodic) + sendDictLockInfoEvent(pollCount); } // NF handling @@ -12384,6 +12448,11 @@ Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes) DictLockPtr loopPtr; c_dictLockQueue.first(loopPtr); + if (getOwnNodeId() != c_masterNodeId) { + ndbrequire(loopPtr.i == RNIL); + return; + } + while (loopPtr.i != RNIL) { jam(); DictLockPtr lockPtr = loopPtr; @@ -12409,7 +12478,7 @@ Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes) } } - checkDictLockQueue(signal); + checkDictLockQueue(signal, false); } diff --git a/ndb/src/kernel/blocks/dbdict/Dbdict.hpp b/ndb/src/kernel/blocks/dbdict/Dbdict.hpp index fbad67d8822..9c0bf65b69c 100644 --- a/ndb/src/kernel/blocks/dbdict/Dbdict.hpp +++ b/ndb/src/kernel/blocks/dbdict/Dbdict.hpp @@ -1804,14 +1804,15 @@ private: bool c_dictLockPoll; static const DictLockType* getDictLockType(Uint32 lockType); + void sendDictLockInfoEvent(Uint32 pollCount); void sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text); - void checkDictLockQueue(Signal* signal); + void checkDictLockQueue(Signal* signal, bool poll); void sendDictLockConf(Signal* signal, DictLockPtr lockPtr); void sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode); // control polling i.e. continueB loop - void setDictLockPoll(Signal* signal, bool on); + void setDictLockPoll(Signal* signal, bool on, Uint32 pollCount); // NF handling void removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes); diff --git a/ndb/src/kernel/blocks/dbdict/DictLock.txt b/ndb/src/kernel/blocks/dbdict/DictLock.txt new file mode 100644 index 00000000000..17f24119e9d --- /dev/null +++ b/ndb/src/kernel/blocks/dbdict/DictLock.txt @@ -0,0 +1,94 @@ +Lock master DICT against schema operations + +Implementation +-------------- + +[ see comments in Dbdict.hpp ] + +Use case: Node startup INR / NR +------------------------------- + +Master DICT (like any block) keeps list of alive nodes (c_aliveNodes). +These are participants in schema ops. + +(1) c_aliveNodes is initialized when DICT starts + in sp3 in READ_NODESCONF from CNTR + +(2) when slave node fails (in any sp of the slave node) + it is removed from c_aliveNodes in NODE_FAILREP + +(3) when slave starts, it is added to c_aliveNodes + in sp4 of the starting node in INCL_NODEREQ + +Slave DIH locks master DICT in sp2 and releases the lock when started. +Based on the constraints: + +- the lock is taken when master DICT is known + DIH reads this in sp2 in READ_NODESCONF + +- the lock is taken before (3) + +- the lock is taken before copying starts and held until it is done + in sp4 DIH meta, DICT meta, tuple data + +- on INR in sp2 in START_PERMREQ the LCP info of the slave is erased + in all DIH in invalidateNodeLCP() - not safe under schema ops + +Signals: + +All but DICT_LOCK are standard v5.0 signals. +s=starting node, m=master, a=all participants, l=local block. + +* sp2 - DICT_LOCK and START_PERM + +DIH/s + DICT_LOCK_REQ + DICT/m + DICT_LOCK_CONF +DIH/s + START_PERMREQ + DIH/m + START_INFOREQ + DIH/a + invalidateNodeLCP() if INR + DIH/a + START_INFOCONF + DIH/m + START_PERMCONF +DIH/s + +* sp4 - START_ME (copy metadata, no changes) + +DIH/s + START_MEREQ + DIH/m + COPY_TABREQ + DIH/s + COPY_TABCONF + DIH/m + DICTSTARTREQ + DICT/s + GET_SCHEMA_INFOREQ + DICT/m + SCHEMA_INFO + DICT/s + DICTSTARTCONF + DIH/m + INCL_NODEREQ + DIH/a + INCL_NODEREQ + ANY/l + INCL_NODECONF + DIH/a + INCL_NODECONF + DIH/m + START_MECONF +DIH/s + +* sp7 - release DICT lock + +DIH/s + DICT_UNLOCK_ORD + DICT/m + +# vim: set et sw=4: diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index c37461a1f65..352053bef10 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -1594,6 +1594,9 @@ void Dbdih::nodeRestartPh2Lab(Signal* signal) */ ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL); + // check that we are not yet taking part in schema ops + CRASH_INSERTION(7174); + Uint32 lockType = DictLockReq::NodeRestartLock; Callback c = { safe_cast(&Dbdih::recvDictLockConf_nodeRestart), 0 }; sendDictLockReq(signal, lockType, c); @@ -1746,7 +1749,7 @@ void Dbdih::execSTART_PERMREQ(Signal* signal) ndbrequire(refToNode(retRef) == nodeId); if ((c_nodeStartMaster.activeState) || (c_nodeStartMaster.wait != ZFALSE) || - ERROR_INSERTED_CLEAR(7174)) { + ERROR_INSERTED_CLEAR(7175)) { jam(); signal->theData[0] = nodeId; signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR; @@ -14709,6 +14712,34 @@ Dbdih::sendDictLockReq(Signal* signal, Uint32 lockType, Callback c) lockPtr.p->locked = false; lockPtr.p->callback = c; + // handle rolling upgrade + { + Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version; + + unsigned int get_major = getMajor(masterVersion); + unsigned int get_minor = getMinor(masterVersion); + unsigned int get_build = getBuild(masterVersion); + + ndbrequire(get_major == 4 || get_major == 5); + + if (masterVersion < NDBD_DICT_LOCK_VERSION_5 || + ERROR_INSERTED(7176)) { + jam(); + + infoEvent("DIH: detect upgrade: master node %u old version %u.%u.%u", + (unsigned int)cmasterNodeId, get_major, get_minor, get_build); + + DictLockConf* conf = (DictLockConf*)&signal->theData[0]; + conf->userPtr = lockPtr.i; + conf->lockType = lockType; + conf->lockPtr = ZNIL; + + sendSignal(reference(), GSN_DICT_LOCK_CONF, signal, + DictLockConf::SignalLength, JBB); + return; + } + } + BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId); sendSignal(dictMasterRef, GSN_DICT_LOCK_REQ, signal, DictLockReq::SignalLength, JBB); @@ -14758,6 +14789,19 @@ Dbdih::sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI) c_dictLockSlavePool.release(lockPtr); + // handle rolling upgrade + { + Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version; + + unsigned int get_major = getMajor(masterVersion); + ndbrequire(get_major == 4 || get_major == 5); + + if (masterVersion < NDBD_DICT_LOCK_VERSION_5 || + ERROR_INSERTED(7176)) { + return; + } + } + BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId); sendSignal(dictMasterRef, GSN_DICT_UNLOCK_ORD, signal, DictUnlockOrd::SignalLength, JBB); diff --git a/ndb/test/ndbapi/testDict.cpp b/ndb/test/ndbapi/testDict.cpp index 2cfb78f143e..397f41b3d4e 100644 --- a/ndb/test/ndbapi/testDict.cpp +++ b/ndb/test/ndbapi/testDict.cpp @@ -1590,17 +1590,18 @@ recv_dict_ops_run(NDBT_Context* ctx) int runRestarts(NDBT_Context* ctx, NDBT_Step* step) { - static int err_master[] = { // non-crashing - 0, - 7174 // send one fake START_PERMREF + static int errlst_master[] = { // non-crashing + 7175, // send one fake START_PERMREF + 0 }; - static int err_node[] = { - 0, - 7121, // crash on START_PERMCONF - 7130 // crash on START_MECONF + static int errlst_node[] = { + 7174, // crash before sending DICT_LOCK_REQ + 7176, // pretend master does not support DICT lock + 7121, // crash at receive START_PERMCONF + 0 }; - const uint err_master_cnt = sizeof(err_master)/sizeof(err_master[0]); - const uint err_node_cnt = sizeof(err_node)/sizeof(err_node[0]); + const uint errcnt_master = sizeof(errlst_master)/sizeof(errlst_master[0]); + const uint errcnt_node = sizeof(errlst_node)/sizeof(errlst_node[0]); myRandom48Init(NdbTick_CurrentMillisecond()); NdbRestarter restarter; @@ -1632,7 +1633,7 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step) nodeIdList[nodeIdCnt++] = nodeId; } - if (numnodes >= 4) { + if (numnodes >= 4 && myRandom48(2) == 0) { int rand = myRandom48(numnodes); int nodeId = restarter.getRandomNodeOtherNodeGroup(nodeIdList[0], rand); CHECK(nodeId != -1); @@ -1642,6 +1643,7 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step) g_info << "1: master=" << masterNodeId << " nodes=" << nodeIdList[0] << "," << nodeIdList[1] << endl; + const uint timeout = 60; //secs for node wait const unsigned maxsleep = 2000; //ms bool NF_ops = ctx->getProperty("Restart_NF_ops"); @@ -1655,9 +1657,8 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step) NdbSleep_MilliSleep(myRandom48(maxsleep)); { - int i = 0; - while (i < nodeIdCnt) { - int nodeId = nodeIdList[i++]; + for (int i = 0; i < nodeIdCnt; i++) { + int nodeId = nodeIdList[i]; bool nostart = true; bool abort = NF_type == 0 ? myRandom48(2) : (NF_type == 2); @@ -1676,9 +1677,31 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step) } g_info << "1: wait for nostart" << endl; - CHECK(restarter.waitNodesNoStart(nodeIdList, nodeIdCnt) == 0); + CHECK(restarter.waitNodesNoStart(nodeIdList, nodeIdCnt, timeout) == 0); NdbSleep_MilliSleep(myRandom48(maxsleep)); + int err_master = 0; + int err_node[2] = { 0, 0 }; + + if (NR_error) { + err_master = errlst_master[l % errcnt_master]; + + // limitation: cannot have 2 node restarts and crash_insert + // one node may die for real (NF during startup) + + for (int i = 0; i < nodeIdCnt && nodeIdCnt == 1; i++) { + err_node[i] = errlst_node[l % errcnt_node]; + + // 7176 - no DICT lock protection + + if (err_node[i] == 7176) { + g_info << "1: no dict ops due to error insert " + << err_node[i] << endl; + NR_ops = false; + } + } + } + g_info << "1: " << (NR_ops ? "run" : "pause") << " dict ops" << endl; if (! send_dict_ops_cmd(ctx, NR_ops ? 1 : 2)) break; @@ -1689,23 +1712,17 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step) if (NR_error) { { - int rand = myRandom48(err_master_cnt); - int err = err_master[rand]; + int err = err_master; if (err != 0) { g_info << "1: insert master error " << err << endl; CHECK(restarter.insertErrorInNode(masterNodeId, err) == 0); } } - // limitation: cannot have 2 node restarts and crash_insert - // one node may die for real (NF during startup) + for (int i = 0; i < nodeIdCnt; i++) { + int nodeId = nodeIdList[i]; - int i = 0; - while (i < nodeIdCnt && nodeIdCnt == 1) { - int nodeId = nodeIdList[i++]; - - int rand = myRandom48(err_node_cnt); - int err = err_node[rand]; + int err = err_node[i]; if (err != 0) { g_info << "1: insert node " << nodeId << " error " << err << endl; CHECK(restarter.insertErrorInNode(nodeId, err) == 0); @@ -1715,7 +1732,7 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step) NdbSleep_MilliSleep(myRandom48(maxsleep)); g_info << "1: wait cluster started" << endl; - CHECK(restarter.waitClusterStarted() == 0); + CHECK(restarter.waitClusterStarted(timeout) == 0); NdbSleep_MilliSleep(myRandom48(maxsleep)); g_info << "1: restart done" << endl; |