diff options
author | unknown <jonas@perch.ndb.mysql.com> | 2007-02-17 23:52:17 +0100 |
---|---|---|
committer | unknown <jonas@perch.ndb.mysql.com> | 2007-02-17 23:52:17 +0100 |
commit | 778b4aad59994e166f91c50b47d686a53a5e8b80 (patch) | |
tree | 17eaa4ec6effda4798e5aac5a50f4f139c262f0b | |
parent | 0e39133ad73e7ec7e821591d912509054174d697 (diff) | |
download | mariadb-git-778b4aad59994e166f91c50b47d686a53a5e8b80.tar.gz |
ndb - bug#26457
master failure during master take over
ndb/src/kernel/blocks/ERROR_codes.txt:
new error code
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
Make sure to clear NF_XX_LCP if master fails during master take-over
ndb/test/include/NdbRestarter.hpp:
Add support for querying next master and node group
(for multi node failure testing)
ndb/test/ndbapi/testNodeRestart.cpp:
testcase
ndb/test/run-test/daily-basic-tests.txt:
testcase
ndb/test/src/NdbRestarter.cpp:
Add support for querying next master and node group
(for multi node failure testing)
-rw-r--r-- | ndb/src/kernel/blocks/ERROR_codes.txt | 4 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 27 | ||||
-rw-r--r-- | ndb/test/include/NdbRestarter.hpp | 2 | ||||
-rw-r--r-- | ndb/test/ndbapi/testNodeRestart.cpp | 42 | ||||
-rw-r--r-- | ndb/test/run-test/daily-basic-tests.txt | 4 | ||||
-rw-r--r-- | ndb/test/src/NdbRestarter.cpp | 62 |
6 files changed, 139 insertions, 2 deletions
diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index 0bcc99a6334..f7cb49014cb 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -5,7 +5,7 @@ Next DBACC 3002 Next DBTUP 4014 Next DBLQH 5043 Next DBDICT 6007 -Next DBDIH 7178 +Next DBDIH 7181 Next DBTC 8039 Next CMVMI 9000 Next BACKUP 10022 @@ -71,6 +71,8 @@ Delay GCP_SAVEREQ by 10 secs 7177: Delay copying of sysfileData in execCOPY_GCIREQ +7180: Crash master during master-take-over in execMASTER_LCPCONF + ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ----------------------------------------------------------------- diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 5f573d40dfe..0e6fe4714b6 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -4612,6 +4612,8 @@ void Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){ jam(); + Uint32 oldNode = c_lcpMasterTakeOverState.failedNodeId; + c_lcpMasterTakeOverState.minTableId = ~0; c_lcpMasterTakeOverState.minFragId = ~0; c_lcpMasterTakeOverState.failedNodeId = nodeId; @@ -4630,7 +4632,20 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){ /** * Node failure during master take over... */ - ndbout_c("Nodefail during master take over"); + ndbout_c("Nodefail during master take over (old: %d)", oldNode); + } + + NodeRecordPtr nodePtr; + nodePtr.i = oldNode; + if (oldNode > 0 && oldNode < MAX_NDB_NODES) + { + jam(); + ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord); + if (nodePtr.p->m_nodefailSteps.get(NF_LCP_TAKE_OVER)) + { + jam(); + checkLocalNodefailComplete(signal, oldNode, NF_LCP_TAKE_OVER); + } } setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER); @@ -5646,6 +5661,14 @@ void Dbdih::execMASTER_LCPREQ(Signal* signal) jamEntry(); const BlockReference newMasterBlockref = req->masterRef; + if (newMasterBlockref != cmasterdihref) + { + jam(); + ndbout_c("resending GSN_MASTER_LCPREQ"); + sendSignalWithDelay(reference(), GSN_MASTER_LCPREQ, signal, + signal->getLength(), 50); + return; + } Uint32 failedNodeId = req->failedNodeId; /** @@ -5946,6 +5969,8 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal) ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord); nodePtr.p->lcpStateAtTakeOver = lcpState; + CRASH_INSERTION(7180); + #ifdef VM_TRACE ndbout_c("MASTER_LCPCONF"); printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0); diff --git a/ndb/test/include/NdbRestarter.hpp b/ndb/test/include/NdbRestarter.hpp index 3ec92ae786e..a2e6a4b3248 100644 --- a/ndb/test/include/NdbRestarter.hpp +++ b/ndb/test/include/NdbRestarter.hpp @@ -62,6 +62,8 @@ public: int dumpStateAllNodes(int * _args, int _num_args); int getMasterNodeId(); + int getNextMasterNodeId(int nodeId); + int getNodeGroup(int nodeId); int getRandomNodeSameNodeGroup(int nodeId, int randomNumber); int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber); int getRandomNotMasterNodeId(int randomNumber); diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index 43fb77342b5..e729e8179b5 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -1045,6 +1045,45 @@ int runBug25554(NDBT_Context* ctx, NDBT_Step* step){ return NDBT_OK; } +int +runBug26457(NDBT_Context* ctx, NDBT_Step* step) +{ + NdbRestarter res; + if (res.getNumDbNodes() < 4) + return NDBT_OK; + + int loops = ctx->getNumLoops(); + while (loops --) + { +retry: + int master = res.getMasterNodeId(); + int next = res.getNextMasterNodeId(master); + + ndbout_c("master: %d next: %d", master, next); + + if (res.getNodeGroup(master) == res.getNodeGroup(next)) + { + res.restartOneDbNode(next, false, false, true); + if (res.waitClusterStarted()) + return NDBT_FAILED; + goto retry; + } + + int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 2 }; + + if (res.dumpStateOneNode(next, val2, 2)) + return NDBT_FAILED; + + if (res.insertErrorInNode(next, 7180)) + return NDBT_FAILED; + + res.restartOneDbNode(master, false, false, true); + if (res.waitClusterStarted()) + return NDBT_FAILED; + } + + return NDBT_OK; +} NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", @@ -1367,6 +1406,9 @@ TESTCASE("Bug25364", ""){ TESTCASE("Bug25554", ""){ INITIALIZER(runBug25554); } +TESTCASE("Bug26457", ""){ + INITIALIZER(runBug26457); +} NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt index 00fa14eea2c..9074ff145d8 100644 --- a/ndb/test/run-test/daily-basic-tests.txt +++ b/ndb/test/run-test/daily-basic-tests.txt @@ -477,6 +477,10 @@ max-time: 1000 cmd: testNodeRestart args: -n Bug25554 T1 +max-time: 1000 +cmd: testNodeRestart +args: -n Bug26457 T1 + # OLD FLEX max-time: 500 cmd: flexBench diff --git a/ndb/test/src/NdbRestarter.cpp b/ndb/test/src/NdbRestarter.cpp index 2c16a05240d..4c7f52a8622 100644 --- a/ndb/test/src/NdbRestarter.cpp +++ b/ndb/test/src/NdbRestarter.cpp @@ -129,6 +129,68 @@ NdbRestarter::getMasterNodeId(){ } int +NdbRestarter::getNodeGroup(int nodeId){ + if (!isConnected()) + return -1; + + if (getStatus() != 0) + return -1; + + for(size_t i = 0; i < ndbNodes.size(); i++) + { + if(ndbNodes[i].node_id == nodeId) + { + return ndbNodes[i].node_group; + } + } + + return -1; +} + +int +NdbRestarter::getNextMasterNodeId(int nodeId){ + if (!isConnected()) + return -1; + + if (getStatus() != 0) + return -1; + + size_t i; + for(i = 0; i < ndbNodes.size(); i++) + { + if(ndbNodes[i].node_id == nodeId) + { + break; + } + } + assert(i < ndbNodes.size()); + if (i == ndbNodes.size()) + return -1; + + int dynid = ndbNodes[i].dynamic_id; + int minid = dynid; + for (i = 0; i<ndbNodes.size(); i++) + if (ndbNodes[i].dynamic_id > minid) + minid = ndbNodes[i].dynamic_id; + + for (i = 0; i<ndbNodes.size(); i++) + if (ndbNodes[i].dynamic_id > dynid && + ndbNodes[i].dynamic_id < minid) + { + minid = ndbNodes[i].dynamic_id; + } + + if (minid != ~0) + { + for (i = 0; i<ndbNodes.size(); i++) + if (ndbNodes[i].dynamic_id == minid) + return ndbNodes[i].node_id; + } + + return getMasterNodeId(); +} + +int NdbRestarter::getRandomNotMasterNodeId(int rand){ int master = getMasterNodeId(); if(master == -1) |