diff options
author | unknown <jonas@perch.ndb.mysql.com> | 2005-12-09 13:51:12 +0100 |
---|---|---|
committer | unknown <jonas@perch.ndb.mysql.com> | 2005-12-09 13:51:12 +0100 |
commit | 5b10b7bc535241fa48370be6bd2008fc21efa323 (patch) | |
tree | 248bd78661d1ee74b2fb6a848f1ffa302bc030c8 /ndb | |
parent | fc4c198ef01f401bf9d88171ef794dd4868cefd5 (diff) | |
download | mariadb-git-5b10b7bc535241fa48370be6bd2008fc21efa323.tar.gz |
bug#15632 - ndb
Fix race between INCL_NODEREQ(prio b) and GCP_PREPARE(prio a) by also waiting for starting nodes
ndb/include/ndb_version.h.in:
Handle upgrade of bug fix
ndb/src/kernel/blocks/ERROR_codes.txt:
New error code for delaying INCL_NODE_REQ
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
Fix race between INCL_NODEREQ(prio b) and GCP_PREPARE(prio a)
by also waiting for starting nodes
ndb/test/ndbapi/testNodeRestart.cpp:
Add testcase for bug#15632
Diffstat (limited to 'ndb')
-rw-r--r-- | ndb/include/ndb_version.h.in | 3 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/ERROR_codes.txt | 2 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 38 | ||||
-rw-r--r-- | ndb/test/ndbapi/testNodeRestart.cpp | 60 |
4 files changed, 100 insertions, 3 deletions
diff --git a/ndb/include/ndb_version.h.in b/ndb/include/ndb_version.h.in index 826f5124407..38b72306d03 100644 --- a/ndb/include/ndb_version.h.in +++ b/ndb/include/ndb_version.h.in @@ -57,5 +57,8 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ]; */ /*#define NDB_VERSION_ID 0*/ +#define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17) +#define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18) + #endif diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index 66d52528f8d..e11c5ef4c5d 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -61,6 +61,8 @@ Insert system error in GCP participant when receiving GCP_SAVEREQ. 5007: Delay GCP_SAVEREQ by 10 secs +7165: Delay INCL_NODE_REQ in starting node yeilding error in GCP_PREPARE + ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ----------------------------------------------------------------- diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 76c465148d0..97cd8c374c6 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -215,7 +215,7 @@ void Dbdih::sendINCL_NODEREQ(Signal* signal, Uint32 nodeId) signal->theData[2] = c_nodeStartMaster.failNr; signal->theData[3] = 0; signal->theData[4] = currentgcp; - sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBB); + sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBA); }//Dbdih::sendINCL_NODEREQ() void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId) @@ -1857,6 +1857,14 @@ void Dbdih::gcpBlockedLab(Signal* signal) // global checkpoint id and the correct state. We do not wait for any reply // since the starting node will not send any. /*-------------------------------------------------------------------------*/ + Uint32 startVersion = getNodeInfo(c_nodeStartMaster.startNode).m_version; + + if ((getMajor(startVersion) == 4 && startVersion >= NDBD_INCL_NODECONF_VERSION_4) || + (getMajor(startVersion) == 5 && startVersion >= NDBD_INCL_NODECONF_VERSION_5)) + { + c_INCL_NODEREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode); + } + sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode); }//Dbdih::gcpBlockedLab() @@ -2059,6 +2067,13 @@ void Dbdih::execINCL_NODEREQ(Signal* signal) jamEntry(); Uint32 retRef = signal->theData[0]; Uint32 nodeId = signal->theData[1]; + if (nodeId == getOwnNodeId() && ERROR_INSERTED(7165)) + { + CLEAR_ERROR_INSERT_VALUE; + sendSignalWithDelay(reference(), GSN_INCL_NODEREQ, signal, 5000, signal->getLength()); + return; + } + Uint32 tnodeStartFailNr = signal->theData[2]; currentgcp = signal->theData[4]; CRASH_INSERTION(7127); @@ -2086,6 +2101,15 @@ void Dbdih::execINCL_NODEREQ(Signal* signal) // id's and the lcp status. /*-----------------------------------------------------------------------*/ CRASH_INSERTION(7171); + Uint32 masterVersion = getNodeInfo(refToNode(cmasterdihref)).m_version; + + if ((NDB_VERSION_MAJOR == 4 && masterVersion >= NDBD_INCL_NODECONF_VERSION_4) || + (NDB_VERSION_MAJOR == 5 && masterVersion >= NDBD_INCL_NODECONF_VERSION_5)) + { + signal->theData[0] = getOwnNodeId(); + signal->theData[1] = getOwnNodeId(); + sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB); + } return; }//if if (getNodeStatus(nodeId) != NodeRecord::STARTING) { @@ -3737,8 +3761,16 @@ void Dbdih::execNODE_FAILREP(Signal* signal) /*------------------------------------------------------------------------*/ // Verify that a starting node has also crashed. Reset the node start record. /*-------------------------------------------------------------------------*/ - if (c_nodeStartMaster.startNode != RNIL) { - ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)!= NodeRecord::ALIVE); + if (false && c_nodeStartMaster.startNode != RNIL && getNodeStatus(c_nodeStartMaster.startNode) == NodeRecord::ALIVE) + { + BlockReference cntrRef = calcNdbCntrBlockRef(c_nodeStartMaster.startNode); + SystemError * const sysErr = (SystemError*)&signal->theData[0]; + sysErr->errorCode = SystemError::StartInProgressError; + sysErr->errorRef = reference(); + sysErr->data1= 0; + sysErr->data2= __LINE__; + sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal, SystemError::SignalLength, JBA); + nodeResetStart(); }//if /*--------------------------------------------------*/ diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index 41e07267e1b..5daf1fcfea0 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -446,6 +446,56 @@ int runBug15587(NDBT_Context* ctx, NDBT_Step* step){ return NDBT_OK; } +int runBug15632(NDBT_Context* ctx, NDBT_Step* step){ + int result = NDBT_OK; + int loops = ctx->getNumLoops(); + int records = ctx->getNumRecords(); + NdbRestarter restarter; + + int nodeId = restarter.getDbNodeId(1); + + ndbout << "Restart node " << nodeId << endl; + + if (restarter.restartOneDbNode(nodeId, + /** initial */ false, + /** nostart */ true, + /** abort */ true)) + return NDBT_FAILED; + + if (restarter.waitNodesNoStart(&nodeId, 1)) + return NDBT_FAILED; + + if (restarter.insertErrorInNode(nodeId, 7165)) + return NDBT_FAILED; + + if (restarter.startNodes(&nodeId, 1)) + return NDBT_FAILED; + + if (restarter.waitNodesStarted(&nodeId, 1)) + return NDBT_FAILED; + + if (restarter.restartOneDbNode(nodeId, + /** initial */ false, + /** nostart */ true, + /** abort */ true)) + return NDBT_FAILED; + + if (restarter.waitNodesNoStart(&nodeId, 1)) + return NDBT_FAILED; + + if (restarter.insertErrorInNode(nodeId, 7171)) + return NDBT_FAILED; + + if (restarter.startNodes(&nodeId, 1)) + return NDBT_FAILED; + + if (restarter.waitNodesStarted(&nodeId, 1)) + return NDBT_FAILED; + + ctx->stopTest(); + return NDBT_OK; +} + NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", @@ -596,6 +646,8 @@ TESTCASE("RestartNFDuringNR", INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); + STEP(runPkUpdateUntilStopped); + STEP(runScanUpdateUntilStopped); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } @@ -685,6 +737,8 @@ TESTCASE("RestartNodeDuringLCP", INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); + STEP(runPkUpdateUntilStopped); + STEP(runScanUpdateUntilStopped); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } @@ -716,6 +770,12 @@ TESTCASE("Bug15587", STEP(runBug15587); FINALIZER(runClearTable); } +TESTCASE("Bug15632", + "Test bug with NF during NR"){ + INITIALIZER(runLoadTable); + STEP(runBug15632); + FINALIZER(runClearTable); +} NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ |