diff options
author | jonas@perch.ndb.mysql.com <> | 2007-03-13 11:29:14 +0100 |
---|---|---|
committer | jonas@perch.ndb.mysql.com <> | 2007-03-13 11:29:14 +0100 |
commit | e2853bddbc8072ad39075618e91ea82ec3fa36b1 (patch) | |
tree | 616e18ec612f2d39bda2aa31e4aecac2c6cf63b3 | |
parent | 9370d6a3ce9bf94fc4bde2e9c0593e9a77c5d85b (diff) | |
download | mariadb-git-e2853bddbc8072ad39075618e91ea82ec3fa36b1.tar.gz |
ndb - bug#27003
Handle random(not in order) LQHKEYREQ failures during node-restart
-rw-r--r-- | ndb/src/kernel/blocks/ERROR_codes.txt | 12 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dblqh/DblqhMain.cpp | 19 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp | 24 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbtup/DbtupGen.cpp | 2 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp | 1 | ||||
-rw-r--r-- | ndb/test/ndbapi/testNodeRestart.cpp | 56 | ||||
-rw-r--r-- | ndb/test/run-test/daily-basic-tests.txt | 4 |
7 files changed, 114 insertions, 4 deletions
diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index f7cb49014cb..ed35db91738 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -489,3 +489,15 @@ Dbdict: 6003 Crash in participant @ CreateTabReq::Prepare 6004 Crash in participant @ CreateTabReq::Commit 6005 Crash in participant @ CreateTabReq::CreateDrop + +TUP: +---- + +4025: Fail all inserts with out of memory +4026: Fail one insert with oom +4027: Fail inserts randomly with oom +4028: Fail one random insert with oom + +NDBCNTR: + +1000: Crash insertion on SystemError::CopyFragRef diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index 698e5ac292c..5847e1063aa 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -9641,6 +9641,15 @@ void Dblqh::copyCompletedLab(Signal* signal) closeCopyLab(signal); return; }//if + + if (scanptr.p->scanState == ScanRecord::WAIT_LQHKEY_COPY && + scanptr.p->scanErrorCounter) + { + jam(); + closeCopyLab(signal); + return; + } + if (scanptr.p->scanState == ScanRecord::WAIT_LQHKEY_COPY) { jam(); /*---------------------------------------------------------------------------*/ @@ -9717,13 +9726,16 @@ void Dblqh::continueCopyAfterBlockedLab(Signal* signal) void Dblqh::copyLqhKeyRefLab(Signal* signal) { ndbrequire(tcConnectptr.p->transid[1] == signal->theData[4]); - tcConnectptr.p->copyCountWords -= signal->theData[3]; + Uint32 copyWords = signal->theData[3]; scanptr.i = tcConnectptr.p->tcScanRec; c_scanRecordPool.getPtr(scanptr); scanptr.p->scanErrorCounter++; tcConnectptr.p->errorCode = terrorCode; - closeCopyLab(signal); - return; + + LqhKeyConf* conf = (LqhKeyConf*)signal->getDataPtrSend(); + conf->transId1 = copyWords; + conf->transId2 = tcConnectptr.p->transid[1]; + copyCompletedLab(signal); }//Dblqh::copyLqhKeyRefLab() void Dblqh::closeCopyLab(Signal* signal) @@ -9734,6 +9746,7 @@ void Dblqh::closeCopyLab(Signal* signal) // Wait until all of those have arrived until we start the // close process. /*---------------------------------------------------------------------------*/ + scanptr.p->scanState = ScanRecord::WAIT_LQHKEY_COPY; jam(); return; }//if diff --git a/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp b/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp index 9917ac4e340..15ce54e594c 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp @@ -213,6 +213,30 @@ void Dbtup::execTUP_ALLOCREQ(Signal* signal) //--------------------------------------------------- PagePtr pagePtr; Uint32 pageOffset; + + if (ERROR_INSERTED(4025)) + { + signal->theData[0] = 827; + return; + } + if (ERROR_INSERTED(4026)) + { + CLEAR_ERROR_INSERT_VALUE; + signal->theData[0] = 827; + return; + } + if (ERROR_INSERTED(4027) && (rand() % 100) > 25) + { + signal->theData[0] = 827; + return; + } + if (ERROR_INSERTED(4028) && (rand() % 100) > 25) + { + CLEAR_ERROR_INSERT_VALUE; + signal->theData[0] = 827; + return; + } + if (!allocTh(regFragPtr.p, regTabPtr.p, NORMAL_PAGE, diff --git a/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp b/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp index 6b1056fdeac..f5b4e1fb944 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp @@ -66,6 +66,7 @@ void Dbtup::initData() undoPage = 0; totNoOfPagesAllocated = 0; cnoOfAllocatedPages = 0; + CLEAR_ERROR_INSERT_VALUE; // Records with constant sizes }//Dbtup::initData() @@ -570,7 +571,6 @@ void Dbtup::execSTTOR(Signal* signal) switch (startPhase) { case ZSTARTPHASE1: ljam(); - CLEAR_ERROR_INSERT_VALUE; cownref = calcTupBlockRef(0); break; default: diff --git a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp index a9039385805..c05f04d700c 100644 --- a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +++ b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp @@ -180,6 +180,7 @@ void Ndbcntr::execSYSTEM_ERROR(Signal* signal) break; case SystemError::CopyFragRefError: + CRASH_INSERTION(1000); BaseString::snprintf(buf, sizeof(buf), "Killed by node %d as " "copyfrag failed, error: %u", diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index c8c8ddd88d6..e8f8ac66f74 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -1125,6 +1125,59 @@ runBug26481(NDBT_Context* ctx, NDBT_Step* step) return NDBT_OK; } +int +runBug27003(NDBT_Context* ctx, NDBT_Step* step) +{ + int result = NDBT_OK; + int loops = ctx->getNumLoops(); + int records = ctx->getNumRecords(); + NdbRestarter res; + + static const int errnos[] = { 4025, 4026, 4027, 4028, 0 }; + + int node = res.getRandomNotMasterNodeId(rand()); + ndbout_c("node: %d", node); + if (res.restartOneDbNode(node, false, true, true)) + return NDBT_FAILED; + + Uint32 pos = 0; + for (Uint32 i = 0; i<loops; i++) + { + while (errnos[pos] != 0) + { + ndbout_c("Tesing err: %d", errnos[pos]); + + if (res.waitNodesNoStart(&node, 1)) + return NDBT_FAILED; + + if (res.insertErrorInNode(node, 1000)) + return NDBT_FAILED; + + if (res.insertErrorInNode(node, errnos[pos])) + return NDBT_FAILED; + + int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + if (res.dumpStateOneNode(node, val2, 2)) + return NDBT_FAILED; + + res.startNodes(&node, 1); + res.waitNodesStartPhase(&node, 1, 2); + pos++; + } + pos = 0; + } + + if (res.waitNodesNoStart(&node, 1)) + return NDBT_FAILED; + + res.startNodes(&node, 1); + if (res.waitClusterStarted()) + return NDBT_FAILED; + + return NDBT_OK; +} + + NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", "Test that one node at a time can be stopped and then restarted "\ @@ -1452,6 +1505,9 @@ TESTCASE("Bug26457", ""){ TESTCASE("Bug26481", ""){ INITIALIZER(runBug26481); } +TESTCASE("Bug27003", ""){ + INITIALIZER(runBug27003); +} NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt index baad45013dd..1c4d2bd6b7b 100644 --- a/ndb/test/run-test/daily-basic-tests.txt +++ b/ndb/test/run-test/daily-basic-tests.txt @@ -425,6 +425,10 @@ max-time: 500 cmd: testScan args: -n Bug24447 T1 +max-time: 1000 +cmd: testNodeRestart +args: -n Bug27003 T1 + max-time: 500 cmd: testNodeRestart args: -n Bug15587 T1 |