diff options
author | unknown <jonas@perch.ndb.mysql.com> | 2006-03-22 11:44:31 +0100 |
---|---|---|
committer | unknown <jonas@perch.ndb.mysql.com> | 2006-03-22 11:44:31 +0100 |
commit | 19340f2242443ec54101d7fd518be47211ed0f15 (patch) | |
tree | f085354e9003c9a1df81f52da01b55b03754a54d | |
parent | 058019f66cdfefcdaa179f011c9c9a10ee0d10df (diff) | |
download | mariadb-git-19340f2242443ec54101d7fd518be47211ed0f15.tar.gz |
ndb - bug#18414
Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding
ndb/src/kernel/blocks/ERROR_codes.txt:
New error code
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
remove dumping of LCP info during NF
ndb/src/kernel/blocks/dbtc/DbtcMain.cpp:
Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding
ndb/test/ndbapi/testNodeRestart.cpp:
Add testcase for bug18414
ndb/test/ndbapi/testTimeout.cpp:
Fix error code checking
ndb/test/run-test/daily-basic-tests.txt:
Add testcase for bug18414
-rw-r--r-- | ndb/src/kernel/blocks/ERROR_codes.txt | 2 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 4 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbtc/DbtcMain.cpp | 52 | ||||
-rw-r--r-- | ndb/test/ndbapi/testNodeRestart.cpp | 73 | ||||
-rw-r--r-- | ndb/test/ndbapi/testTimeout.cpp | 7 | ||||
-rw-r--r-- | ndb/test/run-test/daily-basic-tests.txt | 4 |
6 files changed, 128 insertions, 14 deletions
diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index e5576450846..b4c5d1b1d7e 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -226,6 +226,8 @@ Delay execution of COMPLETECONF signal 2 seconds to generate time-out. 8045: (ABORTCONF only as part of take-over) Delay execution of ABORTCONF signal 2 seconds to generate time-out. +8050: Send ZABORT_TIMEOUT_BREAK delayed + ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC ------------------------------------------------- diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index a8633af2529..de35ce5c275 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -5982,10 +5982,6 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) signal->theData[0] = 7012; execDUMP_STATE_ORD(signal); - signal->theData[0] = 7015; - signal->theData[1] = 0; - execDUMP_STATE_ORD(signal); - c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__); checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER); diff --git a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp index ff9b279592c..4ca13bf433b 100644 --- a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp +++ b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp @@ -6386,6 +6386,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) return; } + bool found = false; OperationState tmp[16]; Uint32 TloopCount = 0; @@ -6393,7 +6394,31 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) jam(); if (tcConnectptr.i == RNIL) { jam(); - if (Tcheck == 0) { + +#ifdef VM_TRACE + ndbout_c("found: %d Tcheck: %d apiConnectptr.p->counter: %d", + found, Tcheck, apiConnectptr.p->counter); +#endif + if (found || apiConnectptr.p->counter) + { + jam(); + /** + * We sent atleast one ABORT/ABORTED + * or ZABORT_TIMEOUT_BREAK is in job buffer + * wait for reception... + */ + return; + } + + if (Tcheck == 1) + { + jam(); + releaseAbortResources(signal); + return; + } + + if (Tcheck == 0) + { jam(); /*------------------------------------------------------------------ * All nodes had already reported ABORTED for all tcConnect records. @@ -6402,9 +6427,11 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) *------------------------------------------------------------------*/ char buf[96]; buf[0] = 0; char buf2[96]; - BaseString::snprintf(buf, sizeof(buf), "TC %d: %d ops:", - __LINE__, apiConnectptr.i); - for(Uint32 i = 0; i<TloopCount; i++){ + BaseString::snprintf(buf, sizeof(buf), "TC %d: %d counter: %d ops:", + __LINE__, apiConnectptr.i, + apiConnectptr.p->counter); + for(Uint32 i = 0; i<TloopCount; i++) + { BaseString::snprintf(buf2, sizeof(buf2), "%s %d", buf, tmp[i]); BaseString::snprintf(buf, sizeof(buf), buf2); } @@ -6412,7 +6439,9 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) ndbout_c(buf); ndbrequire(false); releaseAbortResources(signal); + return; } + return; }//if TloopCount++; @@ -6427,7 +6456,16 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) signal->theData[0] = TcContinueB::ZABORT_TIMEOUT_BREAK; signal->theData[1] = tcConnectptr.i; signal->theData[2] = apiConnectptr.i; - sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB); + if (ERROR_INSERTED(8050)) + { + ndbout_c("sending ZABORT_TIMEOUT_BREAK delayed (%d %d)", + Tcheck, apiConnectptr.p->counter); + sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 2000, 3); + } + else + { + sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB); + } return; }//if ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord); @@ -6450,7 +6488,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) jam(); if (tcConnectptr.p->tcNodedata[Ti] != 0) { TloopCount += 31; - Tcheck = 1; + found = true; hostptr.i = tcConnectptr.p->tcNodedata[Ti]; ptrCheckGuard(hostptr, chostFilesize, hostRecord); if (hostptr.p->hostStatus == HS_ALIVE) { @@ -7007,8 +7045,6 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal) hostptr.i = tfailedNodeId; ptrCheckGuard(hostptr, chostFilesize, hostRecord); - ndbout_c("received execTAKE_OVERTCCONF(%d) from %x (%x)", - tfailedNodeId, signal->getSendersBlockRef(), reference()); if (signal->getSendersBlockRef() != reference()) { jam(); diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index eebd631af94..cc2998ff73a 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -581,6 +581,73 @@ runBug16772(NDBT_Context* ctx, NDBT_Step* step){ return ret ? NDBT_OK : NDBT_FAILED; } +int +runBug18414(NDBT_Context* ctx, NDBT_Step* step){ + + NdbRestarter restarter; + if (restarter.getNumDbNodes() < 2) + { + ctx->stopTest(); + return NDBT_OK; + } + + Ndb* pNdb = GETNDB(step); + HugoOperations hugoOps(*ctx->getTab()); + HugoTransactions hugoTrans(*ctx->getTab()); + int loop = 0; + do + { + if(hugoOps.startTransaction(pNdb) != 0) + goto err; + + if(hugoOps.pkUpdateRecord(pNdb, 0, 128, rand()) != 0) + goto err; + + if(hugoOps.execute_NoCommit(pNdb) != 0) + goto err; + + int node1 = hugoOps.getTransaction()->getConnectedNodeId(); + int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand()); + + if (node1 == -1 || node2 == -1) + break; + + if (loop & 1) + { + if (restarter.insertErrorInNode(node1, 8050)) + goto err; + } + + if (restarter.insertErrorInNode(node2, 5003)) + goto err; + + int res= hugoOps.execute_Rollback(pNdb); + + if (restarter.waitNodesNoStart(&node2, 1) != 0) + goto err; + + if (restarter.insertErrorInAllNodes(0)) + goto err; + + if (restarter.startNodes(&node2, 1) != 0) + goto err; + + if (restarter.waitClusterStarted() != 0) + goto err; + + if (hugoTrans.scanUpdateRecords(pNdb, 128) != 0) + goto err; + + hugoOps.closeTransaction(pNdb); + + } while(++loop < 5); + + return NDBT_OK; + +err: + hugoOps.closeTransaction(pNdb); + return NDBT_FAILED; +} NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", @@ -870,6 +937,12 @@ TESTCASE("Bug16772", "Test bug with restarting before NF handling is complete"){ STEP(runBug16772); } +TESTCASE("Bug18414", + "Test bug with NF during NR"){ + INITIALIZER(runLoadTable); + STEP(runBug18414); + FINALIZER(runClearTable); +} NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ diff --git a/ndb/test/ndbapi/testTimeout.cpp b/ndb/test/ndbapi/testTimeout.cpp index 25392698642..957fcd1d1e7 100644 --- a/ndb/test/ndbapi/testTimeout.cpp +++ b/ndb/test/ndbapi/testTimeout.cpp @@ -173,8 +173,11 @@ int runTimeoutTrans(NDBT_Context* ctx, NDBT_Step* step){ NdbSleep_MilliSleep(sleep); // Expect that transaction has timed-out - CHECK(hugoOps.execute_Commit(pNdb) == 237); - + int ret = hugoOps.execute_Commit(pNdb); + CHECK(ret != 0); + NdbError err = pNdb->getNdbError(ret); + CHECK(err.classification == NdbError::TimeoutExpired); + } while(false); hugoOps.closeTransaction(pNdb); diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt index 0533d585a41..b11e4479a57 100644 --- a/ndb/test/run-test/daily-basic-tests.txt +++ b/ndb/test/run-test/daily-basic-tests.txt @@ -458,6 +458,10 @@ max-time: 500 cmd: testSystemRestart args: -n Bug18385 T1 +max-time: 500 +cmd: testNodeRestart +args: -n Bug18414 T1 + # OLD FLEX max-time: 500 cmd: flexBench |