summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjonas@perch.ndb.mysql.com <>2007-03-13 11:29:14 +0100
committerjonas@perch.ndb.mysql.com <>2007-03-13 11:29:14 +0100
commite2853bddbc8072ad39075618e91ea82ec3fa36b1 (patch)
tree616e18ec612f2d39bda2aa31e4aecac2c6cf63b3
parent9370d6a3ce9bf94fc4bde2e9c0593e9a77c5d85b (diff)
downloadmariadb-git-e2853bddbc8072ad39075618e91ea82ec3fa36b1.tar.gz
ndb - bug#27003
Handle random(not in order) LQHKEYREQ failures during node-restart
-rw-r--r--ndb/src/kernel/blocks/ERROR_codes.txt12
-rw-r--r--ndb/src/kernel/blocks/dblqh/DblqhMain.cpp19
-rw-r--r--ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp24
-rw-r--r--ndb/src/kernel/blocks/dbtup/DbtupGen.cpp2
-rw-r--r--ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp1
-rw-r--r--ndb/test/ndbapi/testNodeRestart.cpp56
-rw-r--r--ndb/test/run-test/daily-basic-tests.txt4
7 files changed, 114 insertions, 4 deletions
diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt
index f7cb49014cb..ed35db91738 100644
--- a/ndb/src/kernel/blocks/ERROR_codes.txt
+++ b/ndb/src/kernel/blocks/ERROR_codes.txt
@@ -489,3 +489,15 @@ Dbdict:
6003 Crash in participant @ CreateTabReq::Prepare
6004 Crash in participant @ CreateTabReq::Commit
6005 Crash in participant @ CreateTabReq::CreateDrop
+
+TUP:
+----
+
+4025: Fail all inserts with out of memory
+4026: Fail one insert with oom
+4027: Fail inserts randomly with oom
+4028: Fail one random insert with oom
+
+NDBCNTR:
+
+1000: Crash insertion on SystemError::CopyFragRef
diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
index 698e5ac292c..5847e1063aa 100644
--- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
+++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
@@ -9641,6 +9641,15 @@ void Dblqh::copyCompletedLab(Signal* signal)
closeCopyLab(signal);
return;
}//if
+
+ if (scanptr.p->scanState == ScanRecord::WAIT_LQHKEY_COPY &&
+ scanptr.p->scanErrorCounter)
+ {
+ jam();
+ closeCopyLab(signal);
+ return;
+ }
+
if (scanptr.p->scanState == ScanRecord::WAIT_LQHKEY_COPY) {
jam();
/*---------------------------------------------------------------------------*/
@@ -9717,13 +9726,16 @@ void Dblqh::continueCopyAfterBlockedLab(Signal* signal)
void Dblqh::copyLqhKeyRefLab(Signal* signal)
{
ndbrequire(tcConnectptr.p->transid[1] == signal->theData[4]);
- tcConnectptr.p->copyCountWords -= signal->theData[3];
+ Uint32 copyWords = signal->theData[3];
scanptr.i = tcConnectptr.p->tcScanRec;
c_scanRecordPool.getPtr(scanptr);
scanptr.p->scanErrorCounter++;
tcConnectptr.p->errorCode = terrorCode;
- closeCopyLab(signal);
- return;
+
+ LqhKeyConf* conf = (LqhKeyConf*)signal->getDataPtrSend();
+ conf->transId1 = copyWords;
+ conf->transId2 = tcConnectptr.p->transid[1];
+ copyCompletedLab(signal);
}//Dblqh::copyLqhKeyRefLab()
void Dblqh::closeCopyLab(Signal* signal)
@@ -9734,6 +9746,7 @@ void Dblqh::closeCopyLab(Signal* signal)
// Wait until all of those have arrived until we start the
// close process.
/*---------------------------------------------------------------------------*/
+ scanptr.p->scanState = ScanRecord::WAIT_LQHKEY_COPY;
jam();
return;
}//if
diff --git a/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp b/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp
index 9917ac4e340..15ce54e594c 100644
--- a/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp
+++ b/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp
@@ -213,6 +213,30 @@ void Dbtup::execTUP_ALLOCREQ(Signal* signal)
//---------------------------------------------------
PagePtr pagePtr;
Uint32 pageOffset;
+
+ if (ERROR_INSERTED(4025))
+ {
+ signal->theData[0] = 827;
+ return;
+ }
+ if (ERROR_INSERTED(4026))
+ {
+ CLEAR_ERROR_INSERT_VALUE;
+ signal->theData[0] = 827;
+ return;
+ }
+ if (ERROR_INSERTED(4027) && (rand() % 100) > 25)
+ {
+ signal->theData[0] = 827;
+ return;
+ }
+ if (ERROR_INSERTED(4028) && (rand() % 100) > 25)
+ {
+ CLEAR_ERROR_INSERT_VALUE;
+ signal->theData[0] = 827;
+ return;
+ }
+
if (!allocTh(regFragPtr.p,
regTabPtr.p,
NORMAL_PAGE,
diff --git a/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp b/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp
index 6b1056fdeac..f5b4e1fb944 100644
--- a/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp
+++ b/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp
@@ -66,6 +66,7 @@ void Dbtup::initData()
undoPage = 0;
totNoOfPagesAllocated = 0;
cnoOfAllocatedPages = 0;
+ CLEAR_ERROR_INSERT_VALUE;
// Records with constant sizes
}//Dbtup::initData()
@@ -570,7 +571,6 @@ void Dbtup::execSTTOR(Signal* signal)
switch (startPhase) {
case ZSTARTPHASE1:
ljam();
- CLEAR_ERROR_INSERT_VALUE;
cownref = calcTupBlockRef(0);
break;
default:
diff --git a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
index a9039385805..c05f04d700c 100644
--- a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
+++ b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
@@ -180,6 +180,7 @@ void Ndbcntr::execSYSTEM_ERROR(Signal* signal)
break;
case SystemError::CopyFragRefError:
+ CRASH_INSERTION(1000);
BaseString::snprintf(buf, sizeof(buf),
"Killed by node %d as "
"copyfrag failed, error: %u",
diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp
index c8c8ddd88d6..e8f8ac66f74 100644
--- a/ndb/test/ndbapi/testNodeRestart.cpp
+++ b/ndb/test/ndbapi/testNodeRestart.cpp
@@ -1125,6 +1125,59 @@ runBug26481(NDBT_Context* ctx, NDBT_Step* step)
return NDBT_OK;
}
+int
+runBug27003(NDBT_Context* ctx, NDBT_Step* step)
+{
+ int result = NDBT_OK;
+ int loops = ctx->getNumLoops();
+ int records = ctx->getNumRecords();
+ NdbRestarter res;
+
+ static const int errnos[] = { 4025, 4026, 4027, 4028, 0 };
+
+ int node = res.getRandomNotMasterNodeId(rand());
+ ndbout_c("node: %d", node);
+ if (res.restartOneDbNode(node, false, true, true))
+ return NDBT_FAILED;
+
+ Uint32 pos = 0;
+ for (Uint32 i = 0; i<loops; i++)
+ {
+ while (errnos[pos] != 0)
+ {
+ ndbout_c("Tesing err: %d", errnos[pos]);
+
+ if (res.waitNodesNoStart(&node, 1))
+ return NDBT_FAILED;
+
+ if (res.insertErrorInNode(node, 1000))
+ return NDBT_FAILED;
+
+ if (res.insertErrorInNode(node, errnos[pos]))
+ return NDBT_FAILED;
+
+ int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+ if (res.dumpStateOneNode(node, val2, 2))
+ return NDBT_FAILED;
+
+ res.startNodes(&node, 1);
+ res.waitNodesStartPhase(&node, 1, 2);
+ pos++;
+ }
+ pos = 0;
+ }
+
+ if (res.waitNodesNoStart(&node, 1))
+ return NDBT_FAILED;
+
+ res.startNodes(&node, 1);
+ if (res.waitClusterStarted())
+ return NDBT_FAILED;
+
+ return NDBT_OK;
+}
+
+
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
@@ -1452,6 +1505,9 @@ TESTCASE("Bug26457", ""){
TESTCASE("Bug26481", ""){
INITIALIZER(runBug26481);
}
+TESTCASE("Bug27003", ""){
+ INITIALIZER(runBug27003);
+}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt
index baad45013dd..1c4d2bd6b7b 100644
--- a/ndb/test/run-test/daily-basic-tests.txt
+++ b/ndb/test/run-test/daily-basic-tests.txt
@@ -425,6 +425,10 @@ max-time: 500
cmd: testScan
args: -n Bug24447 T1
+max-time: 1000
+cmd: testNodeRestart
+args: -n Bug27003 T1
+
max-time: 500
cmd: testNodeRestart
args: -n Bug15587 T1