summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <jonas@perch.ndb.mysql.com>2006-03-22 11:44:31 +0100
committerunknown <jonas@perch.ndb.mysql.com>2006-03-22 11:44:31 +0100
commit19340f2242443ec54101d7fd518be47211ed0f15 (patch)
treef085354e9003c9a1df81f52da01b55b03754a54d
parent058019f66cdfefcdaa179f011c9c9a10ee0d10df (diff)
downloadmariadb-git-19340f2242443ec54101d7fd518be47211ed0f15.tar.gz
ndb - bug#18414
Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding ndb/src/kernel/blocks/ERROR_codes.txt: New error code ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: remove dumping of LCP info during NF ndb/src/kernel/blocks/dbtc/DbtcMain.cpp: Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding ndb/test/ndbapi/testNodeRestart.cpp: Add testcase for bug18414 ndb/test/ndbapi/testTimeout.cpp: Fix error code checking ndb/test/run-test/daily-basic-tests.txt: Add testcase for bug18414
-rw-r--r--ndb/src/kernel/blocks/ERROR_codes.txt2
-rw-r--r--ndb/src/kernel/blocks/dbdih/DbdihMain.cpp4
-rw-r--r--ndb/src/kernel/blocks/dbtc/DbtcMain.cpp52
-rw-r--r--ndb/test/ndbapi/testNodeRestart.cpp73
-rw-r--r--ndb/test/ndbapi/testTimeout.cpp7
-rw-r--r--ndb/test/run-test/daily-basic-tests.txt4
6 files changed, 128 insertions, 14 deletions
diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt
index e5576450846..b4c5d1b1d7e 100644
--- a/ndb/src/kernel/blocks/ERROR_codes.txt
+++ b/ndb/src/kernel/blocks/ERROR_codes.txt
@@ -226,6 +226,8 @@ Delay execution of COMPLETECONF signal 2 seconds to generate time-out.
8045: (ABORTCONF only as part of take-over)
Delay execution of ABORTCONF signal 2 seconds to generate time-out.
+8050: Send ZABORT_TIMEOUT_BREAK delayed
+
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC
-------------------------------------------------
diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
index a8633af2529..de35ce5c275 100644
--- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
@@ -5982,10 +5982,6 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
signal->theData[0] = 7012;
execDUMP_STATE_ORD(signal);
- signal->theData[0] = 7015;
- signal->theData[1] = 0;
- execDUMP_STATE_ORD(signal);
-
c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER);
diff --git a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
index ff9b279592c..4ca13bf433b 100644
--- a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
+++ b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
@@ -6386,6 +6386,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
return;
}
+ bool found = false;
OperationState tmp[16];
Uint32 TloopCount = 0;
@@ -6393,7 +6394,31 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
jam();
if (tcConnectptr.i == RNIL) {
jam();
- if (Tcheck == 0) {
+
+#ifdef VM_TRACE
+ ndbout_c("found: %d Tcheck: %d apiConnectptr.p->counter: %d",
+ found, Tcheck, apiConnectptr.p->counter);
+#endif
+ if (found || apiConnectptr.p->counter)
+ {
+ jam();
+ /**
+ * We sent atleast one ABORT/ABORTED
+ * or ZABORT_TIMEOUT_BREAK is in job buffer
+ * wait for reception...
+ */
+ return;
+ }
+
+ if (Tcheck == 1)
+ {
+ jam();
+ releaseAbortResources(signal);
+ return;
+ }
+
+ if (Tcheck == 0)
+ {
jam();
/*------------------------------------------------------------------
* All nodes had already reported ABORTED for all tcConnect records.
@@ -6402,9 +6427,11 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
*------------------------------------------------------------------*/
char buf[96]; buf[0] = 0;
char buf2[96];
- BaseString::snprintf(buf, sizeof(buf), "TC %d: %d ops:",
- __LINE__, apiConnectptr.i);
- for(Uint32 i = 0; i<TloopCount; i++){
+ BaseString::snprintf(buf, sizeof(buf), "TC %d: %d counter: %d ops:",
+ __LINE__, apiConnectptr.i,
+ apiConnectptr.p->counter);
+ for(Uint32 i = 0; i<TloopCount; i++)
+ {
BaseString::snprintf(buf2, sizeof(buf2), "%s %d", buf, tmp[i]);
BaseString::snprintf(buf, sizeof(buf), buf2);
}
@@ -6412,7 +6439,9 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
ndbout_c(buf);
ndbrequire(false);
releaseAbortResources(signal);
+ return;
}
+
return;
}//if
TloopCount++;
@@ -6427,7 +6456,16 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
signal->theData[0] = TcContinueB::ZABORT_TIMEOUT_BREAK;
signal->theData[1] = tcConnectptr.i;
signal->theData[2] = apiConnectptr.i;
- sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+ if (ERROR_INSERTED(8050))
+ {
+ ndbout_c("sending ZABORT_TIMEOUT_BREAK delayed (%d %d)",
+ Tcheck, apiConnectptr.p->counter);
+ sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 2000, 3);
+ }
+ else
+ {
+ sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+ }
return;
}//if
ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord);
@@ -6450,7 +6488,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
jam();
if (tcConnectptr.p->tcNodedata[Ti] != 0) {
TloopCount += 31;
- Tcheck = 1;
+ found = true;
hostptr.i = tcConnectptr.p->tcNodedata[Ti];
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
if (hostptr.p->hostStatus == HS_ALIVE) {
@@ -7007,8 +7045,6 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal)
hostptr.i = tfailedNodeId;
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
- ndbout_c("received execTAKE_OVERTCCONF(%d) from %x (%x)",
- tfailedNodeId, signal->getSendersBlockRef(), reference());
if (signal->getSendersBlockRef() != reference())
{
jam();
diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp
index eebd631af94..cc2998ff73a 100644
--- a/ndb/test/ndbapi/testNodeRestart.cpp
+++ b/ndb/test/ndbapi/testNodeRestart.cpp
@@ -581,6 +581,73 @@ runBug16772(NDBT_Context* ctx, NDBT_Step* step){
return ret ? NDBT_OK : NDBT_FAILED;
}
+int
+runBug18414(NDBT_Context* ctx, NDBT_Step* step){
+
+ NdbRestarter restarter;
+ if (restarter.getNumDbNodes() < 2)
+ {
+ ctx->stopTest();
+ return NDBT_OK;
+ }
+
+ Ndb* pNdb = GETNDB(step);
+ HugoOperations hugoOps(*ctx->getTab());
+ HugoTransactions hugoTrans(*ctx->getTab());
+ int loop = 0;
+ do
+ {
+ if(hugoOps.startTransaction(pNdb) != 0)
+ goto err;
+
+ if(hugoOps.pkUpdateRecord(pNdb, 0, 128, rand()) != 0)
+ goto err;
+
+ if(hugoOps.execute_NoCommit(pNdb) != 0)
+ goto err;
+
+ int node1 = hugoOps.getTransaction()->getConnectedNodeId();
+ int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
+
+ if (node1 == -1 || node2 == -1)
+ break;
+
+ if (loop & 1)
+ {
+ if (restarter.insertErrorInNode(node1, 8050))
+ goto err;
+ }
+
+ if (restarter.insertErrorInNode(node2, 5003))
+ goto err;
+
+ int res= hugoOps.execute_Rollback(pNdb);
+
+ if (restarter.waitNodesNoStart(&node2, 1) != 0)
+ goto err;
+
+ if (restarter.insertErrorInAllNodes(0))
+ goto err;
+
+ if (restarter.startNodes(&node2, 1) != 0)
+ goto err;
+
+ if (restarter.waitClusterStarted() != 0)
+ goto err;
+
+ if (hugoTrans.scanUpdateRecords(pNdb, 128) != 0)
+ goto err;
+
+ hugoOps.closeTransaction(pNdb);
+
+ } while(++loop < 5);
+
+ return NDBT_OK;
+
+err:
+ hugoOps.closeTransaction(pNdb);
+ return NDBT_FAILED;
+}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
@@ -870,6 +937,12 @@ TESTCASE("Bug16772",
"Test bug with restarting before NF handling is complete"){
STEP(runBug16772);
}
+TESTCASE("Bug18414",
+ "Test bug with NF during NR"){
+ INITIALIZER(runLoadTable);
+ STEP(runBug18414);
+ FINALIZER(runClearTable);
+}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
diff --git a/ndb/test/ndbapi/testTimeout.cpp b/ndb/test/ndbapi/testTimeout.cpp
index 25392698642..957fcd1d1e7 100644
--- a/ndb/test/ndbapi/testTimeout.cpp
+++ b/ndb/test/ndbapi/testTimeout.cpp
@@ -173,8 +173,11 @@ int runTimeoutTrans(NDBT_Context* ctx, NDBT_Step* step){
NdbSleep_MilliSleep(sleep);
// Expect that transaction has timed-out
- CHECK(hugoOps.execute_Commit(pNdb) == 237);
-
+ int ret = hugoOps.execute_Commit(pNdb);
+ CHECK(ret != 0);
+ NdbError err = pNdb->getNdbError(ret);
+ CHECK(err.classification == NdbError::TimeoutExpired);
+
} while(false);
hugoOps.closeTransaction(pNdb);
diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt
index 0533d585a41..b11e4479a57 100644
--- a/ndb/test/run-test/daily-basic-tests.txt
+++ b/ndb/test/run-test/daily-basic-tests.txt
@@ -458,6 +458,10 @@ max-time: 500
cmd: testSystemRestart
args: -n Bug18385 T1
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug18414 T1
+
# OLD FLEX
max-time: 500
cmd: flexBench