summaryrefslogtreecommitdiff
path: root/ndb
diff options
context:
space:
mode:
authorunknown <jonas@perch.ndb.mysql.com>2005-12-13 11:54:42 +0100
committerunknown <jonas@perch.ndb.mysql.com>2005-12-13 11:54:42 +0100
commit01c7bd20b9fc28530387eeb5e3165f9ad8535301 (patch)
tree7e3d3ff692b611a80ae1078b5a69f09f9f5ded6d /ndb
parent374dd037683065d8c7701ead56cb2eb9260812b1 (diff)
parentfd80fa2d4c5c45c64f16283c61e6926eea3f3af1 (diff)
downloadmariadb-git-01c7bd20b9fc28530387eeb5e3165f9ad8535301.tar.gz
Merge perch.ndb.mysql.com:/home/jonas/src/mysql-4.1
into perch.ndb.mysql.com:/home/jonas/src/mysql-5.0 ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: Auto merged ndb/src/kernel/blocks/dblqh/Dblqh.hpp: Auto merged ndb/src/kernel/blocks/dbtc/DbtcMain.cpp: Auto merged ndb/test/ndbapi/testNodeRestart.cpp: Auto merged ndb/src/kernel/blocks/ERROR_codes.txt: merge ndb/src/kernel/blocks/dblqh/DblqhMain.cpp: merge ndb/test/run-test/daily-basic-tests.txt: merge
Diffstat (limited to 'ndb')
-rw-r--r--ndb/include/kernel/signaldata/DumpStateOrd.hpp2
-rw-r--r--ndb/include/ndb_version.h.in3
-rw-r--r--ndb/src/kernel/blocks/ERROR_codes.txt9
-rw-r--r--ndb/src/kernel/blocks/dbdih/DbdihMain.cpp52
-rw-r--r--ndb/src/kernel/blocks/dblqh/Dblqh.hpp1
-rw-r--r--ndb/src/kernel/blocks/dblqh/DblqhMain.cpp18
-rw-r--r--ndb/src/kernel/blocks/dbtc/DbtcMain.cpp3
-rw-r--r--ndb/test/ndbapi/testNodeRestart.cpp149
-rw-r--r--ndb/test/run-test/daily-basic-tests.txt21
9 files changed, 250 insertions, 8 deletions
diff --git a/ndb/include/kernel/signaldata/DumpStateOrd.hpp b/ndb/include/kernel/signaldata/DumpStateOrd.hpp
index bde690e056d..4dd22cf5092 100644
--- a/ndb/include/kernel/signaldata/DumpStateOrd.hpp
+++ b/ndb/include/kernel/signaldata/DumpStateOrd.hpp
@@ -78,6 +78,8 @@ public:
LqhDumpAllScanRec = 2301,
LqhDumpAllActiveScanRec = 2302,
LqhDumpLcpState = 2303,
+ LqhErrorInsert5042 = 2315,
+
AccDumpOneScanRec = 2400,
AccDumpAllScanRec = 2401,
AccDumpAllActiveScanRec = 2402,
diff --git a/ndb/include/ndb_version.h.in b/ndb/include/ndb_version.h.in
index 826f5124407..38b72306d03 100644
--- a/ndb/include/ndb_version.h.in
+++ b/ndb/include/ndb_version.h.in
@@ -57,5 +57,8 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ];
*/
/*#define NDB_VERSION_ID 0*/
+#define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17)
+#define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18)
+
#endif
diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt
index 1a72537a77e..0be5e91cd71 100644
--- a/ndb/src/kernel/blocks/ERROR_codes.txt
+++ b/ndb/src/kernel/blocks/ERROR_codes.txt
@@ -61,6 +61,8 @@ Insert system error in GCP participant when receiving GCP_SAVEREQ.
5007:
Delay GCP_SAVEREQ by 10 secs
+7165: Delay INCL_NODE_REQ in starting node yeilding error in GCP_PREPARE
+
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
-----------------------------------------------------------------
@@ -155,11 +157,15 @@ Insert node failure handling when receiving COMPLETEREQ.
5006:
Insert node failure handling when receiving ABORTREQ.
+5042:
+As 5002, but with specified table (see DumpStateOrd)
+
These error code can be combined with error codes for testing time-out
handling in DBTC to ensure that node failures are also well handled in
time-out handling. They can also be used to test multiple node failure
handling.
+
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBLQH
-------------------------------------------------
5011:
@@ -198,6 +204,9 @@ Delay execution of ABORTREQ signal 2 seconds to generate time-out.
8050: Send TCKEYREF is operation is non local
+5100,5101: Drop ABORT req in primary replica
+ Crash on "next" ABORT
+
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC
-------------------------------------------------
8040:
diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
index 29de0368212..c1e90dccf12 100644
--- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
@@ -215,7 +215,7 @@ void Dbdih::sendINCL_NODEREQ(Signal* signal, Uint32 nodeId)
signal->theData[2] = c_nodeStartMaster.failNr;
signal->theData[3] = 0;
signal->theData[4] = currentgcp;
- sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBB);
+ sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBA);
}//Dbdih::sendINCL_NODEREQ()
void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId)
@@ -1863,6 +1863,14 @@ void Dbdih::gcpBlockedLab(Signal* signal)
// global checkpoint id and the correct state. We do not wait for any reply
// since the starting node will not send any.
/*-------------------------------------------------------------------------*/
+ Uint32 startVersion = getNodeInfo(c_nodeStartMaster.startNode).m_version;
+
+ if ((getMajor(startVersion) == 4 && startVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
+ (getMajor(startVersion) == 5 && startVersion >= NDBD_INCL_NODECONF_VERSION_5))
+ {
+ c_INCL_NODEREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode);
+ }
+
sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode);
}//Dbdih::gcpBlockedLab()
@@ -2065,6 +2073,13 @@ void Dbdih::execINCL_NODEREQ(Signal* signal)
jamEntry();
Uint32 retRef = signal->theData[0];
Uint32 nodeId = signal->theData[1];
+ if (nodeId == getOwnNodeId() && ERROR_INSERTED(7165))
+ {
+ CLEAR_ERROR_INSERT_VALUE;
+ sendSignalWithDelay(reference(), GSN_INCL_NODEREQ, signal, 5000, signal->getLength());
+ return;
+ }
+
Uint32 tnodeStartFailNr = signal->theData[2];
currentgcp = signal->theData[4];
CRASH_INSERTION(7127);
@@ -2092,6 +2107,15 @@ void Dbdih::execINCL_NODEREQ(Signal* signal)
// id's and the lcp status.
/*-----------------------------------------------------------------------*/
CRASH_INSERTION(7171);
+ Uint32 masterVersion = getNodeInfo(refToNode(cmasterdihref)).m_version;
+
+ if ((NDB_VERSION_MAJOR == 4 && masterVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
+ (NDB_VERSION_MAJOR == 5 && masterVersion >= NDBD_INCL_NODECONF_VERSION_5))
+ {
+ signal->theData[0] = getOwnNodeId();
+ signal->theData[1] = getOwnNodeId();
+ sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB);
+ }
return;
}//if
if (getNodeStatus(nodeId) != NodeRecord::STARTING) {
@@ -3741,8 +3765,16 @@ void Dbdih::execNODE_FAILREP(Signal* signal)
/*------------------------------------------------------------------------*/
// Verify that a starting node has also crashed. Reset the node start record.
/*-------------------------------------------------------------------------*/
- if (c_nodeStartMaster.startNode != RNIL) {
- ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)!= NodeRecord::ALIVE);
+ if (false && c_nodeStartMaster.startNode != RNIL && getNodeStatus(c_nodeStartMaster.startNode) == NodeRecord::ALIVE)
+ {
+ BlockReference cntrRef = calcNdbCntrBlockRef(c_nodeStartMaster.startNode);
+ SystemError * const sysErr = (SystemError*)&signal->theData[0];
+ sysErr->errorCode = SystemError::StartInProgressError;
+ sysErr->errorRef = reference();
+ sysErr->data1= 0;
+ sysErr->data2= __LINE__;
+ sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal, SystemError::SignalLength, JBA);
+ nodeResetStart();
}//if
/*--------------------------------------------------*/
@@ -5189,15 +5221,16 @@ void Dbdih::removeNodeFromTable(Signal* signal,
/**
* For each of replica record
*/
- Uint32 replicaNo = 0;
+ bool found = false;
ReplicaRecordPtr replicaPtr;
for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL;
- replicaPtr.i = replicaPtr.p->nextReplica, replicaNo++) {
+ replicaPtr.i = replicaPtr.p->nextReplica) {
jam();
ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
if(replicaPtr.p->procNode == nodeId){
jam();
+ found = true;
noOfRemovedReplicas++;
removeNodeFromStored(nodeId, fragPtr, replicaPtr);
if(replicaPtr.p->lcpOngoingFlag){
@@ -5213,6 +5246,15 @@ void Dbdih::removeNodeFromTable(Signal* signal,
}
}
}
+ if (!found)
+ {
+ jam();
+ /**
+ * Run updateNodeInfo to remove any dead nodes from list of activeNodes
+ * see bug#15587
+ */
+ updateNodeInfo(fragPtr);
+ }
noOfRemainingLcpReplicas += fragPtr.p->noLcpReplicas;
}
diff --git a/ndb/src/kernel/blocks/dblqh/Dblqh.hpp b/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
index 94a40adcd4a..1ed383853ba 100644
--- a/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
+++ b/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
@@ -2885,6 +2885,7 @@ private:
UintR ctransidHash[1024];
Uint32 c_diskless;
+ Uint32 c_error_insert_table_id;
public:
/**
diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
index 2170f890f35..2e50c0f8f73 100644
--- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
+++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
@@ -3543,6 +3543,7 @@ void Dblqh::execLQHKEYREQ(Signal* signal)
jam();
regTcPtr->activeCreat = ZTRUE;
CRASH_INSERTION(5002);
+ CRASH_INSERTION2(5042, tabptr.i == c_error_insert_table_id);
} else {
regTcPtr->activeCreat = ZFALSE;
}//if
@@ -5880,12 +5881,21 @@ void Dblqh::execABORT(Signal* signal)
warningReport(signal, 8);
return;
}//if
+
+ TcConnectionrec * const regTcPtr = tcConnectptr.p;
+
+ if (ERROR_INSERTED(5100))
+ {
+ SET_ERROR_INSERT_VALUE(5101);
+ return;
+ }
+ CRASH_INSERTION2(5101, regTcPtr->nextReplica != ZNIL);
+
/* ------------------------------------------------------------------------- */
/*A GUIDING DESIGN PRINCIPLE IN HANDLING THESE ERROR SITUATIONS HAVE BEEN */
/*KEEP IT SIMPLE. THUS WE RATHER INSERT A WAIT AND SET THE ABORT_STATE TO */
/*ACTIVE RATHER THAN WRITE NEW CODE TO HANDLE EVERY SPECIAL SITUATION. */
/* ------------------------------------------------------------------------- */
- TcConnectionrec * const regTcPtr = tcConnectptr.p;
if (regTcPtr->nextReplica != ZNIL) {
/* ------------------------------------------------------------------------- */
// We will immediately send the ABORT message also to the next LQH node in line.
@@ -18522,6 +18532,12 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal)
}
}
+
+ if (dumpState->args[0] == DumpStateOrd::LqhErrorInsert5042 && signal->getLength() == 2)
+ {
+ c_error_insert_table_id = dumpState->args[1];
+ SET_ERROR_INSERT_VALUE(5042);
+ }
}//Dblqh::execDUMP_STATE_ORD()
void Dblqh::execSET_VAR_REQ(Signal* signal)
diff --git a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
index f2646fd4176..d88ffae1d85 100644
--- a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
+++ b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
@@ -6183,7 +6183,6 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
<< " - place: " << c_apiConTimer_line[apiConnectptr.i]);
switch (apiConnectptr.p->apiConnectstate) {
case CS_STARTED:
- ndbrequire(c_apiConTimer_line[apiConnectptr.i] != 3615);
if(apiConnectptr.p->lqhkeyreqrec == apiConnectptr.p->lqhkeyconfrec){
jam();
/*
@@ -6443,8 +6442,8 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
warningEvent(buf);
ndbout_c(buf);
ndbrequire(false);
+ releaseAbortResources(signal);
}
- releaseAbortResources(signal);
return;
}//if
TloopCount++;
diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp
index 6ef3da2d760..92d6c1830ef 100644
--- a/ndb/test/ndbapi/testNodeRestart.cpp
+++ b/ndb/test/ndbapi/testNodeRestart.cpp
@@ -21,6 +21,7 @@
#include <NdbRestarter.hpp>
#include <NdbRestarts.hpp>
#include <Vector.hpp>
+#include <signaldata/DumpStateOrd.hpp>
int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
@@ -409,6 +410,132 @@ int runLateCommit(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK;
}
+int runBug15587(NDBT_Context* ctx, NDBT_Step* step){
+ int result = NDBT_OK;
+ int loops = ctx->getNumLoops();
+ int records = ctx->getNumRecords();
+ NdbRestarter restarter;
+
+ Uint32 tableId = ctx->getTab()->getTableId();
+ int dump[2] = { DumpStateOrd::LqhErrorInsert5042, 0 };
+ dump[1] = tableId;
+
+ int nodeId = restarter.getDbNodeId(1);
+
+ ndbout << "Restart node " << nodeId << endl;
+
+ if (restarter.restartOneDbNode(nodeId,
+ /** initial */ false,
+ /** nostart */ true,
+ /** abort */ true))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesNoStart(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.dumpStateOneNode(nodeId, dump, 2))
+ return NDBT_FAILED;
+
+ if (restarter.startNodes(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesStarted(&nodeId, 1))
+ return NDBT_FAILED;
+
+ ctx->stopTest();
+ return NDBT_OK;
+}
+
+int runBug15632(NDBT_Context* ctx, NDBT_Step* step){
+ int result = NDBT_OK;
+ int loops = ctx->getNumLoops();
+ int records = ctx->getNumRecords();
+ NdbRestarter restarter;
+
+ int nodeId = restarter.getDbNodeId(1);
+
+ ndbout << "Restart node " << nodeId << endl;
+
+ if (restarter.restartOneDbNode(nodeId,
+ /** initial */ false,
+ /** nostart */ true,
+ /** abort */ true))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesNoStart(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.insertErrorInNode(nodeId, 7165))
+ return NDBT_FAILED;
+
+ if (restarter.startNodes(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesStarted(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.restartOneDbNode(nodeId,
+ /** initial */ false,
+ /** nostart */ true,
+ /** abort */ true))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesNoStart(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.insertErrorInNode(nodeId, 7171))
+ return NDBT_FAILED;
+
+ if (restarter.startNodes(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesStarted(&nodeId, 1))
+ return NDBT_FAILED;
+
+ ctx->stopTest();
+ return NDBT_OK;
+}
+
+int runBug15685(NDBT_Context* ctx, NDBT_Step* step){
+
+ Ndb* pNdb = GETNDB(step);
+ HugoOperations hugoOps(*ctx->getTab());
+ NdbRestarter restarter;
+
+ HugoTransactions hugoTrans(*ctx->getTab());
+ if (hugoTrans.loadTable(GETNDB(step), 10) != 0){
+ return NDBT_FAILED;
+ }
+
+ if(hugoOps.startTransaction(pNdb) != 0)
+ goto err;
+
+ if(hugoOps.pkUpdateRecord(pNdb, 0, 1, rand()) != 0)
+ goto err;
+
+ if(hugoOps.execute_NoCommit(pNdb) != 0)
+ goto err;
+
+ if (restarter.insertErrorInAllNodes(5100))
+ return NDBT_FAILED;
+
+ hugoOps.execute_Rollback(pNdb);
+
+ if (restarter.waitClusterStarted() != 0)
+ goto err;
+
+ if (restarter.insertErrorInAllNodes(0))
+ return NDBT_FAILED;
+
+ ctx->stopTest();
+ return NDBT_OK;
+
+err:
+ ctx->stopTest();
+ return NDBT_FAILED;
+}
+
+
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
@@ -558,6 +685,8 @@ TESTCASE("RestartNFDuringNR",
INITIALIZER(runCheckAllNodesStarted);
INITIALIZER(runLoadTable);
STEP(runRestarts);
+ STEP(runPkUpdateUntilStopped);
+ STEP(runScanUpdateUntilStopped);
FINALIZER(runScanReadVerify);
FINALIZER(runClearTable);
}
@@ -647,6 +776,8 @@ TESTCASE("RestartNodeDuringLCP",
INITIALIZER(runCheckAllNodesStarted);
INITIALIZER(runLoadTable);
STEP(runRestarts);
+ STEP(runPkUpdateUntilStopped);
+ STEP(runScanUpdateUntilStopped);
FINALIZER(runScanReadVerify);
FINALIZER(runClearTable);
}
@@ -671,6 +802,24 @@ TESTCASE("LateCommit",
STEP(runLateCommit);
FINALIZER(runClearTable);
}
+TESTCASE("Bug15587",
+ "Test bug with NF during NR"){
+ INITIALIZER(runLoadTable);
+ STEP(runScanUpdateUntilStopped);
+ STEP(runBug15587);
+ FINALIZER(runClearTable);
+}
+TESTCASE("Bug15632",
+ "Test bug with NF during NR"){
+ INITIALIZER(runLoadTable);
+ STEP(runBug15632);
+ FINALIZER(runClearTable);
+}
+TESTCASE("Bug15685",
+ "Test bug with NF during abort"){
+ STEP(runBug15685);
+ FINALIZER(runClearTable);
+}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt
index 8b44594a9b5..59f51044b51 100644
--- a/ndb/test/run-test/daily-basic-tests.txt
+++ b/ndb/test/run-test/daily-basic-tests.txt
@@ -413,6 +413,27 @@ max-time: 500
cmd: testScan
args: -n ScanParallelism
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug15587 T1
+
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug15632 T1
+
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug15685 T1
+
+# OLD FLEX
+max-time: 500
+cmd: flexBench
+args: -c 25 -t 10
+
+max-time: 500
+cmd: flexHammer
+args: -r 5 -t 32
+
#
# DICT TESTS
max-time: 1500