diff options
author | unknown <jonas@perch.ndb.mysql.com> | 2006-03-20 14:53:29 +0100 |
---|---|---|
committer | unknown <jonas@perch.ndb.mysql.com> | 2006-03-20 14:53:29 +0100 |
commit | d230d0e1e6c7aa92bd6afabee378746d9d46c340 (patch) | |
tree | a2927f85b2f13543e609b8ebdb7c1e8d8bca13f3 /ndb | |
parent | 51a093f18762d299899c7c9e5cb0a2a639631720 (diff) | |
download | mariadb-git-d230d0e1e6c7aa92bd6afabee378746d9d46c340.tar.gz |
ndb - wl2610, bug#18352
Remove useless and tricky state fiddleing in TC
to syncronize NF_CompleteRep as code is already present in DIH aswell
Keep broadcast of TAKEOVER_TCCONF for online upgrade
ndb/src/kernel/blocks/dblqh/DblqhMain.cpp:
Add clever dump for showing active operations
ndb/src/kernel/blocks/dbtc/Dbtc.hpp:
Remove useless and tricky state fiddleing in TC
to syncronize NF_CompleteRep as code is already present in DIH aswell
Keep broadcast of TAKEOVER_TCCONF for online upgrade
ndb/src/kernel/blocks/dbtc/DbtcMain.cpp:
Remove useless and tricky state fiddleing in TC
to syncronize NF_CompleteRep as code is already present in DIH aswell
Keep broadcast of TAKEOVER_TCCONF for online upgrade
Diffstat (limited to 'ndb')
-rw-r--r-- | ndb/src/kernel/blocks/dblqh/DblqhMain.cpp | 166 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbtc/Dbtc.hpp | 9 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbtc/DbtcMain.cpp | 186 |
3 files changed, 208 insertions, 153 deletions
diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index ff7e3c32924..0aeeaccd55e 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -18448,6 +18448,172 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal) c_error_insert_table_id = dumpState->args[1]; SET_ERROR_INSERT_VALUE(5042); } + + TcConnectionrec *regTcConnectionrec = tcConnectionrec; + Uint32 ttcConnectrecFileSize = ctcConnectrecFileSize; + Uint32 arg = dumpState->args[0]; + if(arg == 2306) + { + for(Uint32 i = 0; i<1024; i++) + { + TcConnectionrecPtr tcRec; + tcRec.i = ctransidHash[i]; + while(tcRec.i != RNIL) + { + ptrCheckGuard(tcRec, ttcConnectrecFileSize, regTcConnectionrec); + ndbout << "TcConnectionrec " << tcRec.i; + signal->theData[0] = 2307; + signal->theData[1] = tcRec.i; + execDUMP_STATE_ORD(signal); + tcRec.i = tcRec.p->nextHashRec; + } + } + } + + if(arg == 2307 || arg == 2308) + { + TcConnectionrecPtr tcRec; + tcRec.i = signal->theData[1]; + ptrCheckGuard(tcRec, ttcConnectrecFileSize, regTcConnectionrec); + + ndbout << " transactionState = " << tcRec.p->transactionState<<endl; + ndbout << " operation = " << tcRec.p->operation<<endl; + ndbout << " tcNodeFailrec = " << tcRec.p->tcNodeFailrec + << " seqNoReplica = " << tcRec.p->seqNoReplica + << " simpleRead = " << tcRec.p->simpleRead + << endl; + ndbout << " replicaType = " << tcRec.p->replicaType + << " reclenAiLqhkey = " << tcRec.p->reclenAiLqhkey + << " opExec = " << tcRec.p->opExec + << endl; + ndbout << " opSimple = " << tcRec.p->opSimple + << " nextSeqNoReplica = " << tcRec.p->nextSeqNoReplica + << " lockType = " << tcRec.p->lockType + << endl; + ndbout << " lastReplicaNo = " << tcRec.p->lastReplicaNo + << " indTakeOver = " << tcRec.p->indTakeOver + << " dirtyOp = " << tcRec.p->dirtyOp + << endl; + ndbout << " activeCreat = " << tcRec.p->activeCreat + << " tcBlockref = " << hex << tcRec.p->tcBlockref + << " reqBlockref = " << hex << tcRec.p->reqBlockref + << " primKeyLen = " << tcRec.p->primKeyLen + << endl; + ndbout << " nextReplica = " << tcRec.p->nextReplica + << " tcBlockref = " << hex << tcRec.p->tcBlockref + << " reqBlockref = " << hex << tcRec.p->reqBlockref + << " primKeyLen = " << tcRec.p->primKeyLen + << endl; + ndbout << " logStopPageNo = " << tcRec.p->logStopPageNo + << " logStartPageNo = " << tcRec.p->logStartPageNo + << " logStartPageIndex = " << tcRec.p->logStartPageIndex + << endl; + ndbout << " errorCode = " << tcRec.p->errorCode + << " clientBlockref = " << hex << tcRec.p->clientBlockref + << " applRef = " << hex << tcRec.p->applRef + << " totSendlenAi = " << tcRec.p->totSendlenAi + << endl; + ndbout << " totReclenAi = " << tcRec.p->totReclenAi + << " tcScanRec = " << tcRec.p->tcScanRec + << " tcScanInfo = " << tcRec.p->tcScanInfo + << " tcOprec = " << hex << tcRec.p->tcOprec + << endl; + ndbout << " tableref = " << tcRec.p->tableref + << " simpleTcConnect = " << tcRec.p->simpleTcConnect + << " storedProcId = " << tcRec.p->storedProcId + << " schemaVersion = " << tcRec.p->schemaVersion + << endl; + ndbout << " reqinfo = " << tcRec.p->reqinfo + << " reqRef = " << tcRec.p->reqRef + << " readlenAi = " << tcRec.p->readlenAi + << " prevTc = " << tcRec.p->prevTc + << endl; + ndbout << " prevLogTcrec = " << tcRec.p->prevLogTcrec + << " prevHashRec = " << tcRec.p->prevHashRec + << " nodeAfterNext0 = " << tcRec.p->nodeAfterNext[0] + << " nodeAfterNext1 = " << tcRec.p->nodeAfterNext[1] + << endl; + ndbout << " nextTcConnectrec = " << tcRec.p->nextTcConnectrec + << " nextTc = " << tcRec.p->nextTc + << " nextTcLogQueue = " << tcRec.p->nextTcLogQueue + << " nextLogTcrec = " << tcRec.p->nextLogTcrec + << endl; + ndbout << " nextHashRec = " << tcRec.p->nextHashRec + << " logWriteState = " << tcRec.p->logWriteState + << " logStartFileNo = " << tcRec.p->logStartFileNo + << " listState = " << tcRec.p->listState + << endl; + ndbout << " lastAttrinbuf = " << tcRec.p->lastAttrinbuf + << " lastTupkeybuf = " << tcRec.p->lastTupkeybuf + << " hashValue = " << tcRec.p->hashValue + << endl; + ndbout << " gci = " << tcRec.p->gci + << " fragmentptr = " << tcRec.p->fragmentptr + << " fragmentid = " << tcRec.p->fragmentid + << " firstTupkeybuf = " << tcRec.p->firstTupkeybuf + << endl; + ndbout << " firstAttrinbuf = " << tcRec.p->firstAttrinbuf + << " currTupAiLen = " << tcRec.p->currTupAiLen + << " currReclenAi = " << tcRec.p->currReclenAi + << endl; + ndbout << " tcTimer = " << tcRec.p->tcTimer + << " clientConnectrec = " << tcRec.p->clientConnectrec + << " applOprec = " << hex << tcRec.p->applOprec + << " abortState = " << tcRec.p->abortState + << endl; + ndbout << " transid0 = " << hex << tcRec.p->transid[0] + << " transid1 = " << hex << tcRec.p->transid[1] + << " tupkeyData0 = " << tcRec.p->tupkeyData[0] + << " tupkeyData1 = " << tcRec.p->tupkeyData[1] + << endl; + ndbout << " tupkeyData2 = " << tcRec.p->tupkeyData[2] + << " tupkeyData3 = " << tcRec.p->tupkeyData[3] + << endl; + switch (tcRec.p->transactionState) { + + case TcConnectionrec::SCAN_STATE_USED: + if (tcRec.p->tcScanRec < cscanrecFileSize){ + ScanRecordPtr TscanPtr; + c_scanRecordPool.getPtr(TscanPtr, tcRec.p->tcScanRec); + ndbout << " scanState = " << TscanPtr.p->scanState << endl; + //TscanPtr.p->scanLocalref[2]; + ndbout << " copyPtr="<<TscanPtr.p->copyPtr + << " scanAccPtr="<<TscanPtr.p->scanAccPtr + << " scanAiLength="<<TscanPtr.p->scanAiLength + << endl; + ndbout << " m_curr_batch_size_rows="<< + TscanPtr.p->m_curr_batch_size_rows + << " m_max_batch_size_rows="<< + TscanPtr.p->m_max_batch_size_rows + << " scanErrorCounter="<<TscanPtr.p->scanErrorCounter + << endl; + ndbout << " scanSchemaVersion="<<TscanPtr.p->scanSchemaVersion + << " scanStoredProcId="<<TscanPtr.p->scanStoredProcId + << " scanTcrec="<<TscanPtr.p->scanTcrec + << endl; + ndbout << " scanType="<<TscanPtr.p->scanType + << " scanApiBlockref="<<TscanPtr.p->scanApiBlockref + << " scanNodeId="<<TscanPtr.p->scanNodeId + << " scanCompletedStatus="<<TscanPtr.p->scanCompletedStatus + << endl; + ndbout << " scanFlag="<<TscanPtr.p->scanFlag + << " scanLockHold="<<TscanPtr.p->scanLockHold + << " scanLockMode="<<TscanPtr.p->scanLockMode + << " scanNumber="<<TscanPtr.p->scanNumber + << endl; + ndbout << " scanReleaseCounter="<<TscanPtr.p->scanReleaseCounter + << " scanTcWaiting="<<TscanPtr.p->scanTcWaiting + << " scanKeyinfoFlag="<<TscanPtr.p->scanKeyinfoFlag + << endl; + } else{ + ndbout << "No connected scan record found" << endl; + } + break; + default: + break; + } + ndbrequire(arg != 2308); + } }//Dblqh::execDUMP_STATE_ORD() diff --git a/ndb/src/kernel/blocks/dbtc/Dbtc.hpp b/ndb/src/kernel/blocks/dbtc/Dbtc.hpp index 23c5a7d08eb..b1332a4fd0b 100644 --- a/ndb/src/kernel/blocks/dbtc/Dbtc.hpp +++ b/ndb/src/kernel/blocks/dbtc/Dbtc.hpp @@ -211,14 +211,6 @@ public: LTS_ACTIVE = 1 }; - enum TakeOverState { - TOS_NOT_DEFINED = 0, - TOS_IDLE = 1, - TOS_ACTIVE = 2, - TOS_COMPLETED = 3, - TOS_NODE_FAILED = 4 - }; - enum FailState { FS_IDLE = 0, FS_LISTENING = 1, @@ -933,7 +925,6 @@ public: struct HostRecord { HostState hostStatus; LqhTransState lqhTransStatus; - TakeOverState takeOverStatus; bool inPackedList; UintR noOfPackedWordsLqh; UintR packedWordsLqh[26]; diff --git a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp index 4750a8c388a..ff9b279592c 100644 --- a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp +++ b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp @@ -303,7 +303,6 @@ void Dbtc::execINCL_NODEREQ(Signal* signal) hostptr.i = signal->theData[1]; ptrCheckGuard(hostptr, chostFilesize, hostRecord); hostptr.p->hostStatus = HS_ALIVE; - hostptr.p->takeOverStatus = TOS_IDLE; signal->theData[0] = cownref; c_alive_nodes.set(hostptr.i); sendSignal(tblockref, GSN_INCL_NODECONF, signal, 1, JBB); @@ -856,8 +855,6 @@ void Dbtc::execREAD_NODESCONF(Signal* signal) hostptr.i = i; ptrCheckGuard(hostptr, chostFilesize, hostRecord); - hostptr.p->takeOverStatus = TOS_IDLE; - if (NodeBitmask::get(readNodes->inactiveNodes, i)) { jam(); hostptr.p->hostStatus = HS_DEAD; @@ -6826,21 +6823,27 @@ void Dbtc::execNODE_FAILREP(Signal* signal) const Uint32 tnewMasterId = nodeFail->masterNodeId; arrGuard(tnoOfNodes, MAX_NDB_NODES); + Uint32 i; int index = 0; - for (unsigned i = 1; i< MAX_NDB_NODES; i++) { - if(NodeBitmask::get(nodeFail->theNodes, i)){ + for (i = 1; i< MAX_NDB_NODES; i++) + { + if(NodeBitmask::get(nodeFail->theNodes, i)) + { cdata[index] = i; index++; }//if }//for + cmasterNodeId = tnewMasterId; + tcNodeFailptr.i = 0; ptrAss(tcNodeFailptr, tcFailRecord); - Uint32 tindex; - for (tindex = 0; tindex < tnoOfNodes; tindex++) { + for (i = 0; i < tnoOfNodes; i++) + { jam(); - hostptr.i = cdata[tindex]; + hostptr.i = cdata[i]; ptrCheckGuard(hostptr, chostFilesize, hostRecord); + /*------------------------------------------------------------*/ /* SET STATUS OF THE FAILED NODE TO DEAD SINCE IT HAS */ /* FAILED. */ @@ -6849,30 +6852,15 @@ void Dbtc::execNODE_FAILREP(Signal* signal) hostptr.p->m_nf_bits = HostRecord::NF_NODE_FAIL_BITS; c_alive_nodes.clear(hostptr.i); - if (hostptr.p->takeOverStatus == TOS_COMPLETED) { - jam(); - /*------------------------------------------------------------*/ - /* A VERY UNUSUAL SITUATION. THE TAKE OVER WAS COMPLETED*/ - /* EVEN BEFORE WE HEARD ABOUT THE NODE FAILURE REPORT. */ - /* HOWEVER UNUSUAL THIS SITUATION IS POSSIBLE. */ - /*------------------------------------------------------------*/ - /* RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE */ - /* REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */ - /* USED THEM IS COMPLETED. */ - /*------------------------------------------------------------*/ - hostptr.p->m_nf_bits &= ~HostRecord::NF_TAKEOVER; - } else { - ndbrequire(hostptr.p->takeOverStatus == TOS_IDLE); - hostptr.p->takeOverStatus = TOS_NODE_FAILED; - }//if - - if (tcNodeFailptr.p->failStatus == FS_LISTENING) { + if (tcNodeFailptr.p->failStatus == FS_LISTENING) + { jam(); /*------------------------------------------------------------*/ /* THE CURRENT TAKE OVER CAN BE AFFECTED BY THIS NODE */ /* FAILURE. */ /*------------------------------------------------------------*/ - if (hostptr.p->lqhTransStatus == LTS_ACTIVE) { + if (hostptr.p->lqhTransStatus == LTS_ACTIVE) + { jam(); /*------------------------------------------------------------*/ /* WE WERE WAITING FOR THE FAILED NODE IN THE TAKE OVER */ @@ -6884,78 +6872,25 @@ void Dbtc::execNODE_FAILREP(Signal* signal) }//if }//if - }//for - - const bool masterFailed = (cmasterNodeId != tnewMasterId); - cmasterNodeId = tnewMasterId; - - if(getOwnNodeId() == cmasterNodeId && masterFailed){ - /** - * Master has failed and I'm the new master - */ - jam(); - - for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) { + if (getOwnNodeId() != tnewMasterId) + { jam(); - ptrAss(hostptr, hostRecord); - if (hostptr.p->hostStatus != HS_ALIVE) { - jam(); - if (hostptr.p->takeOverStatus == TOS_COMPLETED) { - jam(); - /*------------------------------------------------------------*/ - /* SEND TAKE OVER CONFIRMATION TO ALL ALIVE NODES IF */ - /* TAKE OVER IS COMPLETED. THIS IS PERFORMED TO ENSURE */ - /* THAT ALL NODES AGREE ON THE IDLE STATE OF THE TAKE */ - /* OVER. THIS MIGHT BE MISSED IN AN ERROR SITUATION IF */ - /* MASTER FAILS AFTER SENDING CONFIRMATION TO NEW */ - /* MASTER BUT FAILING BEFORE SENDING TO ANOTHER NODE */ - /* WHICH WAS NOT MASTER. IF THIS NODE LATER BECOMES */ - /* MASTER IT MIGHT START A NEW TAKE OVER EVEN AFTER THE */ - /* CRASHED NODE HAVE ALREADY RECOVERED. */ - /*------------------------------------------------------------*/ - NodeReceiverGroup rg(DBTC, c_alive_nodes); - signal->theData[0] = hostptr.i; - sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB); - }//if - }//if - }//for - } - - if(getOwnNodeId() == cmasterNodeId){ - jam(); - for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) { + /** + * Only master does takeover currently + */ + hostptr.p->m_nf_bits &= ~HostRecord::NF_TAKEOVER; + } + else + { jam(); - ptrAss(hostptr, hostRecord); - if (hostptr.p->hostStatus != HS_ALIVE) { - jam(); - if (hostptr.p->takeOverStatus == TOS_NODE_FAILED) { - jam(); - /*------------------------------------------------------------*/ - /* CONCLUDE ALL ACTIVITIES THE FAILED TC DID CONTROL */ - /* SINCE WE ARE THE MASTER. THIS COULD HAVE BEEN STARTED*/ - /* BY A PREVIOUS MASTER BUT HAVE NOT BEEN CONCLUDED YET.*/ - /*------------------------------------------------------------*/ - hostptr.p->takeOverStatus = TOS_ACTIVE; - signal->theData[0] = hostptr.i; - sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB); - }//if - }//if - }//for - }//if - for (tindex = 0; tindex < tnoOfNodes; tindex++) { - jam(); - hostptr.i = cdata[tindex]; - ptrCheckGuard(hostptr, chostFilesize, hostRecord); - /*------------------------------------------------------------*/ - /* LOOP THROUGH AND ABORT ALL SCANS THAT WHERE */ - /* CONTROLLED BY THIS TC AND ACTIVE IN THE FAILED */ - /* NODE'S LQH */ - /*------------------------------------------------------------*/ + signal->theData[0] = hostptr.i; + sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB); + } + checkScanActiveInFailedLqh(signal, 0, hostptr.i); checkWaitDropTabFailedLqh(signal, hostptr.i, 0); // nodeid, tableid nodeFailCheckTransactions(signal, 0, hostptr.i); - }//for - + } }//Dbtc::execNODE_FAILREP() void @@ -7071,47 +7006,17 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal) tfailedNodeId = signal->theData[0]; hostptr.i = tfailedNodeId; ptrCheckGuard(hostptr, chostFilesize, hostRecord); - switch (hostptr.p->takeOverStatus) { - case TOS_IDLE: - jam(); - /*------------------------------------------------------------*/ - /* THIS MESSAGE ARRIVED EVEN BEFORE THE NODE_FAILREP */ - /* MESSAGE. THIS IS POSSIBLE IN EXTREME SITUATIONS. */ - /* WE SET THE STATE TO TAKE_OVER_COMPLETED AND WAIT */ - /* FOR THE NODE_FAILREP MESSAGE. */ - /*------------------------------------------------------------*/ - hostptr.p->takeOverStatus = TOS_COMPLETED; - break; - case TOS_NODE_FAILED: - case TOS_ACTIVE: - jam(); - /*------------------------------------------------------------*/ - /* WE ARE NOT MASTER AND THE TAKE OVER IS ACTIVE OR WE */ - /* ARE MASTER AND THE TAKE OVER IS ACTIVE. IN BOTH */ - /* WE SET THE STATE TO TAKE_OVER_COMPLETED. */ - /*------------------------------------------------------------*/ - /* RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE */ - /* REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */ - /* USED THEM IS COMPLETED. */ - /*------------------------------------------------------------*/ - hostptr.p->takeOverStatus = TOS_COMPLETED; - checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER); - break; - case TOS_COMPLETED: - jam(); - /*------------------------------------------------------------*/ - /* WE HAVE ALREADY RECEIVED THE CONF SIGNAL. IT IS MOST */ - /* LIKELY SENT FROM A NEW MASTER WHICH WASN'T SURE IF */ - /* THIS NODE HEARD THE CONF SIGNAL FROM THE OLD MASTER. */ - /* WE SIMPLY IGNORE THE MESSAGE. */ - /*------------------------------------------------------------*/ - /*empty*/; - break; - default: + + ndbout_c("received execTAKE_OVERTCCONF(%d) from %x (%x)", + tfailedNodeId, signal->getSendersBlockRef(), reference()); + if (signal->getSendersBlockRef() != reference()) + { jam(); - systemErrorLab(signal); return; - }//switch + } + + + checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER); }//Dbtc::execTAKE_OVERTCCONF() void Dbtc::execTAKE_OVERTCREQ(Signal* signal) @@ -7351,16 +7256,10 @@ void Dbtc::completeTransAtTakeOverDoLast(Signal* signal, UintR TtakeOverInd) /* TO REPORT THE COMPLETION OF THE TAKE OVER TO ALL */ /* NODES THAT ARE ALIVE. */ /*------------------------------------------------------------*/ - for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) { - jam(); - ptrAss(hostptr, hostRecord); - if (hostptr.p->hostStatus == HS_ALIVE) { - jam(); - tblockref = calcTcBlockRef(hostptr.i); - signal->theData[0] = tcNodeFailptr.p->takeOverNode; - sendSignal(tblockref, GSN_TAKE_OVERTCCONF, signal, 1, JBB); - }//if - }//for + NodeReceiverGroup rg(DBTC, c_alive_nodes); + signal->theData[0] = tcNodeFailptr.p->takeOverNode; + sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB); + if (tcNodeFailptr.p->queueIndex > 0) { jam(); /*------------------------------------------------------------*/ @@ -9937,7 +9836,6 @@ void Dbtc::inithost(Signal* signal) ptrAss(hostptr, hostRecord); hostptr.p->hostStatus = HS_DEAD; hostptr.p->inPackedList = false; - hostptr.p->takeOverStatus = TOS_NOT_DEFINED; hostptr.p->lqhTransStatus = LTS_IDLE; hostptr.p->noOfWordsTCKEYCONF = 0; hostptr.p->noOfWordsTCINDXCONF = 0; |