summaryrefslogtreecommitdiff
path: root/ndb
diff options
context:
space:
mode:
authorunknown <jonas@perch.ndb.mysql.com>2005-12-13 11:48:26 +0100
committerunknown <jonas@perch.ndb.mysql.com>2005-12-13 11:48:26 +0100
commit3f39dd450e4d542cc68dedcb31719d33dec53126 (patch)
treeba69315fb0ae336862bbea3fdb33d6b27ec9eeeb /ndb
parent12aef413ee7615b4f2cd9391921878ec28643488 (diff)
parentfd80fa2d4c5c45c64f16283c61e6926eea3f3af1 (diff)
downloadmariadb-git-3f39dd450e4d542cc68dedcb31719d33dec53126.tar.gz
Merge perch.ndb.mysql.com:/home/jonas/src/mysql-4.1
into perch.ndb.mysql.com:/home/jonas/src/mysql-4.1-push ndb/src/ndbapi/NdbImpl.hpp: Auto merged ndb/src/ndbapi/NdbRecAttr.cpp: Auto merged ndb/src/ndbapi/ndb_cluster_connection.cpp: Auto merged
Diffstat (limited to 'ndb')
-rw-r--r--ndb/include/kernel/signaldata/DumpStateOrd.hpp2
-rw-r--r--ndb/include/ndb_version.h.in3
-rw-r--r--ndb/src/kernel/blocks/ERROR_codes.txt9
-rw-r--r--ndb/src/kernel/blocks/dbdih/DbdihMain.cpp52
-rw-r--r--ndb/src/kernel/blocks/dblqh/Dblqh.hpp1
-rw-r--r--ndb/src/kernel/blocks/dblqh/DblqhMain.cpp20
-rw-r--r--ndb/src/kernel/blocks/dbtc/DbtcMain.cpp3
-rw-r--r--ndb/test/ndbapi/testNodeRestart.cpp149
-rw-r--r--ndb/test/run-test/daily-basic-tests.txt12
9 files changed, 241 insertions, 10 deletions
diff --git a/ndb/include/kernel/signaldata/DumpStateOrd.hpp b/ndb/include/kernel/signaldata/DumpStateOrd.hpp
index bde690e056d..4dd22cf5092 100644
--- a/ndb/include/kernel/signaldata/DumpStateOrd.hpp
+++ b/ndb/include/kernel/signaldata/DumpStateOrd.hpp
@@ -78,6 +78,8 @@ public:
LqhDumpAllScanRec = 2301,
LqhDumpAllActiveScanRec = 2302,
LqhDumpLcpState = 2303,
+ LqhErrorInsert5042 = 2315,
+
AccDumpOneScanRec = 2400,
AccDumpAllScanRec = 2401,
AccDumpAllActiveScanRec = 2402,
diff --git a/ndb/include/ndb_version.h.in b/ndb/include/ndb_version.h.in
index 826f5124407..38b72306d03 100644
--- a/ndb/include/ndb_version.h.in
+++ b/ndb/include/ndb_version.h.in
@@ -57,5 +57,8 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ];
*/
/*#define NDB_VERSION_ID 0*/
+#define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17)
+#define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18)
+
#endif
diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt
index 791df915d66..5d7c8d758fc 100644
--- a/ndb/src/kernel/blocks/ERROR_codes.txt
+++ b/ndb/src/kernel/blocks/ERROR_codes.txt
@@ -61,6 +61,8 @@ Insert system error in GCP participant when receiving GCP_SAVEREQ.
5007:
Delay GCP_SAVEREQ by 10 secs
+7165: Delay INCL_NODE_REQ in starting node yeilding error in GCP_PREPARE
+
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
-----------------------------------------------------------------
@@ -155,11 +157,15 @@ Insert node failure handling when receiving COMPLETEREQ.
5006:
Insert node failure handling when receiving ABORTREQ.
+5042:
+As 5002, but with specified table (see DumpStateOrd)
+
These error code can be combined with error codes for testing time-out
handling in DBTC to ensure that node failures are also well handled in
time-out handling. They can also be used to test multiple node failure
handling.
+
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBLQH
-------------------------------------------------
5011:
@@ -196,6 +202,9 @@ Delay execution of ABORTREQ signal 2 seconds to generate time-out.
8048: Make TC not choose own node for simple/dirty read
5041: Crash is receiving simple read from other TC on different node
+5100,5101: Drop ABORT req in primary replica
+ Crash on "next" ABORT
+
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC
-------------------------------------------------
8040:
diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
index ca066b588e7..97cd8c374c6 100644
--- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
@@ -215,7 +215,7 @@ void Dbdih::sendINCL_NODEREQ(Signal* signal, Uint32 nodeId)
signal->theData[2] = c_nodeStartMaster.failNr;
signal->theData[3] = 0;
signal->theData[4] = currentgcp;
- sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBB);
+ sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBA);
}//Dbdih::sendINCL_NODEREQ()
void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId)
@@ -1857,6 +1857,14 @@ void Dbdih::gcpBlockedLab(Signal* signal)
// global checkpoint id and the correct state. We do not wait for any reply
// since the starting node will not send any.
/*-------------------------------------------------------------------------*/
+ Uint32 startVersion = getNodeInfo(c_nodeStartMaster.startNode).m_version;
+
+ if ((getMajor(startVersion) == 4 && startVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
+ (getMajor(startVersion) == 5 && startVersion >= NDBD_INCL_NODECONF_VERSION_5))
+ {
+ c_INCL_NODEREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode);
+ }
+
sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode);
}//Dbdih::gcpBlockedLab()
@@ -2059,6 +2067,13 @@ void Dbdih::execINCL_NODEREQ(Signal* signal)
jamEntry();
Uint32 retRef = signal->theData[0];
Uint32 nodeId = signal->theData[1];
+ if (nodeId == getOwnNodeId() && ERROR_INSERTED(7165))
+ {
+ CLEAR_ERROR_INSERT_VALUE;
+ sendSignalWithDelay(reference(), GSN_INCL_NODEREQ, signal, 5000, signal->getLength());
+ return;
+ }
+
Uint32 tnodeStartFailNr = signal->theData[2];
currentgcp = signal->theData[4];
CRASH_INSERTION(7127);
@@ -2086,6 +2101,15 @@ void Dbdih::execINCL_NODEREQ(Signal* signal)
// id's and the lcp status.
/*-----------------------------------------------------------------------*/
CRASH_INSERTION(7171);
+ Uint32 masterVersion = getNodeInfo(refToNode(cmasterdihref)).m_version;
+
+ if ((NDB_VERSION_MAJOR == 4 && masterVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
+ (NDB_VERSION_MAJOR == 5 && masterVersion >= NDBD_INCL_NODECONF_VERSION_5))
+ {
+ signal->theData[0] = getOwnNodeId();
+ signal->theData[1] = getOwnNodeId();
+ sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB);
+ }
return;
}//if
if (getNodeStatus(nodeId) != NodeRecord::STARTING) {
@@ -3737,8 +3761,16 @@ void Dbdih::execNODE_FAILREP(Signal* signal)
/*------------------------------------------------------------------------*/
// Verify that a starting node has also crashed. Reset the node start record.
/*-------------------------------------------------------------------------*/
- if (c_nodeStartMaster.startNode != RNIL) {
- ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)!= NodeRecord::ALIVE);
+ if (false && c_nodeStartMaster.startNode != RNIL && getNodeStatus(c_nodeStartMaster.startNode) == NodeRecord::ALIVE)
+ {
+ BlockReference cntrRef = calcNdbCntrBlockRef(c_nodeStartMaster.startNode);
+ SystemError * const sysErr = (SystemError*)&signal->theData[0];
+ sysErr->errorCode = SystemError::StartInProgressError;
+ sysErr->errorRef = reference();
+ sysErr->data1= 0;
+ sysErr->data2= __LINE__;
+ sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal, SystemError::SignalLength, JBA);
+ nodeResetStart();
}//if
/*--------------------------------------------------*/
@@ -5187,15 +5219,16 @@ void Dbdih::removeNodeFromTable(Signal* signal,
/**
* For each of replica record
*/
- Uint32 replicaNo = 0;
+ bool found = false;
ReplicaRecordPtr replicaPtr;
for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL;
- replicaPtr.i = replicaPtr.p->nextReplica, replicaNo++) {
+ replicaPtr.i = replicaPtr.p->nextReplica) {
jam();
ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
if(replicaPtr.p->procNode == nodeId){
jam();
+ found = true;
noOfRemovedReplicas++;
removeNodeFromStored(nodeId, fragPtr, replicaPtr);
if(replicaPtr.p->lcpOngoingFlag){
@@ -5211,6 +5244,15 @@ void Dbdih::removeNodeFromTable(Signal* signal,
}
}
}
+ if (!found)
+ {
+ jam();
+ /**
+ * Run updateNodeInfo to remove any dead nodes from list of activeNodes
+ * see bug#15587
+ */
+ updateNodeInfo(fragPtr);
+ }
noOfRemainingLcpReplicas += fragPtr.p->noLcpReplicas;
}
diff --git a/ndb/src/kernel/blocks/dblqh/Dblqh.hpp b/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
index 951d1e90251..13ae5aa1bbf 100644
--- a/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
+++ b/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
@@ -2881,6 +2881,7 @@ private:
UintR ctransidHash[1024];
Uint32 c_diskless;
+ Uint32 c_error_insert_table_id;
public:
/**
diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
index b6178227d31..fc6a470e0ef 100644
--- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
+++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
@@ -3532,6 +3532,7 @@ void Dblqh::execLQHKEYREQ(Signal* signal)
jam();
regTcPtr->activeCreat = ZTRUE;
CRASH_INSERTION(5002);
+ CRASH_INSERTION2(5042, tabptr.i == c_error_insert_table_id);
} else {
regTcPtr->activeCreat = ZFALSE;
}//if
@@ -5869,12 +5870,21 @@ void Dblqh::execABORT(Signal* signal)
warningReport(signal, 8);
return;
}//if
+
+ TcConnectionrec * const regTcPtr = tcConnectptr.p;
+
+ if (ERROR_INSERTED(5100))
+ {
+ SET_ERROR_INSERT_VALUE(5101);
+ return;
+ }
+ CRASH_INSERTION2(5101, regTcPtr->nextReplica != ZNIL);
+
/* ------------------------------------------------------------------------- */
/*A GUIDING DESIGN PRINCIPLE IN HANDLING THESE ERROR SITUATIONS HAVE BEEN */
/*KEEP IT SIMPLE. THUS WE RATHER INSERT A WAIT AND SET THE ABORT_STATE TO */
/*ACTIVE RATHER THAN WRITE NEW CODE TO HANDLE EVERY SPECIAL SITUATION. */
/* ------------------------------------------------------------------------- */
- TcConnectionrec * const regTcPtr = tcConnectptr.p;
if (regTcPtr->nextReplica != ZNIL) {
/* ------------------------------------------------------------------------- */
// We will immediately send the ABORT message also to the next LQH node in line.
@@ -18402,8 +18412,12 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal)
return;
}
-
-
+ if (dumpState->args[0] == DumpStateOrd::LqhErrorInsert5042 && signal->getLength() == 2)
+ {
+ c_error_insert_table_id = dumpState->args[1];
+ SET_ERROR_INSERT_VALUE(5042);
+ }
+
}//Dblqh::execDUMP_STATE_ORD()
void Dblqh::execSET_VAR_REQ(Signal* signal)
diff --git a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
index 30a50e7695e..d9d1f01b213 100644
--- a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
+++ b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
@@ -6129,7 +6129,6 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
<< " - place: " << c_apiConTimer_line[apiConnectptr.i]);
switch (apiConnectptr.p->apiConnectstate) {
case CS_STARTED:
- ndbrequire(c_apiConTimer_line[apiConnectptr.i] != 3615);
if(apiConnectptr.p->lqhkeyreqrec == apiConnectptr.p->lqhkeyconfrec){
jam();
/*
@@ -6389,8 +6388,8 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
warningEvent(buf);
ndbout_c(buf);
ndbrequire(false);
+ releaseAbortResources(signal);
}
- releaseAbortResources(signal);
return;
}//if
TloopCount++;
diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp
index 9c25d715d07..a741e6233d9 100644
--- a/ndb/test/ndbapi/testNodeRestart.cpp
+++ b/ndb/test/ndbapi/testNodeRestart.cpp
@@ -21,6 +21,7 @@
#include <NdbRestarter.hpp>
#include <NdbRestarts.hpp>
#include <Vector.hpp>
+#include <signaldata/DumpStateOrd.hpp>
int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
@@ -409,6 +410,132 @@ int runLateCommit(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK;
}
+int runBug15587(NDBT_Context* ctx, NDBT_Step* step){
+ int result = NDBT_OK;
+ int loops = ctx->getNumLoops();
+ int records = ctx->getNumRecords();
+ NdbRestarter restarter;
+
+ Uint32 tableId = ctx->getTab()->getTableId();
+ int dump[2] = { DumpStateOrd::LqhErrorInsert5042, 0 };
+ dump[1] = tableId;
+
+ int nodeId = restarter.getDbNodeId(1);
+
+ ndbout << "Restart node " << nodeId << endl;
+
+ if (restarter.restartOneDbNode(nodeId,
+ /** initial */ false,
+ /** nostart */ true,
+ /** abort */ true))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesNoStart(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.dumpStateOneNode(nodeId, dump, 2))
+ return NDBT_FAILED;
+
+ if (restarter.startNodes(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesStarted(&nodeId, 1))
+ return NDBT_FAILED;
+
+ ctx->stopTest();
+ return NDBT_OK;
+}
+
+int runBug15632(NDBT_Context* ctx, NDBT_Step* step){
+ int result = NDBT_OK;
+ int loops = ctx->getNumLoops();
+ int records = ctx->getNumRecords();
+ NdbRestarter restarter;
+
+ int nodeId = restarter.getDbNodeId(1);
+
+ ndbout << "Restart node " << nodeId << endl;
+
+ if (restarter.restartOneDbNode(nodeId,
+ /** initial */ false,
+ /** nostart */ true,
+ /** abort */ true))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesNoStart(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.insertErrorInNode(nodeId, 7165))
+ return NDBT_FAILED;
+
+ if (restarter.startNodes(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesStarted(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.restartOneDbNode(nodeId,
+ /** initial */ false,
+ /** nostart */ true,
+ /** abort */ true))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesNoStart(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.insertErrorInNode(nodeId, 7171))
+ return NDBT_FAILED;
+
+ if (restarter.startNodes(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesStarted(&nodeId, 1))
+ return NDBT_FAILED;
+
+ ctx->stopTest();
+ return NDBT_OK;
+}
+
+int runBug15685(NDBT_Context* ctx, NDBT_Step* step){
+
+ Ndb* pNdb = GETNDB(step);
+ HugoOperations hugoOps(*ctx->getTab());
+ NdbRestarter restarter;
+
+ HugoTransactions hugoTrans(*ctx->getTab());
+ if (hugoTrans.loadTable(GETNDB(step), 10) != 0){
+ return NDBT_FAILED;
+ }
+
+ if(hugoOps.startTransaction(pNdb) != 0)
+ goto err;
+
+ if(hugoOps.pkUpdateRecord(pNdb, 0, 1, rand()) != 0)
+ goto err;
+
+ if(hugoOps.execute_NoCommit(pNdb) != 0)
+ goto err;
+
+ if (restarter.insertErrorInAllNodes(5100))
+ return NDBT_FAILED;
+
+ hugoOps.execute_Rollback(pNdb);
+
+ if (restarter.waitClusterStarted() != 0)
+ goto err;
+
+ if (restarter.insertErrorInAllNodes(0))
+ return NDBT_FAILED;
+
+ ctx->stopTest();
+ return NDBT_OK;
+
+err:
+ ctx->stopTest();
+ return NDBT_FAILED;
+}
+
+
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
@@ -558,6 +685,8 @@ TESTCASE("RestartNFDuringNR",
INITIALIZER(runCheckAllNodesStarted);
INITIALIZER(runLoadTable);
STEP(runRestarts);
+ STEP(runPkUpdateUntilStopped);
+ STEP(runScanUpdateUntilStopped);
FINALIZER(runScanReadVerify);
FINALIZER(runClearTable);
}
@@ -647,6 +776,8 @@ TESTCASE("RestartNodeDuringLCP",
INITIALIZER(runCheckAllNodesStarted);
INITIALIZER(runLoadTable);
STEP(runRestarts);
+ STEP(runPkUpdateUntilStopped);
+ STEP(runScanUpdateUntilStopped);
FINALIZER(runScanReadVerify);
FINALIZER(runClearTable);
}
@@ -671,6 +802,24 @@ TESTCASE("LateCommit",
STEP(runLateCommit);
FINALIZER(runClearTable);
}
+TESTCASE("Bug15587",
+ "Test bug with NF during NR"){
+ INITIALIZER(runLoadTable);
+ STEP(runScanUpdateUntilStopped);
+ STEP(runBug15587);
+ FINALIZER(runClearTable);
+}
+TESTCASE("Bug15632",
+ "Test bug with NF during NR"){
+ INITIALIZER(runLoadTable);
+ STEP(runBug15632);
+ FINALIZER(runClearTable);
+}
+TESTCASE("Bug15685",
+ "Test bug with NF during abort"){
+ STEP(runBug15685);
+ FINALIZER(runClearTable);
+}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt
index fc04664564f..6378b4a06d3 100644
--- a/ndb/test/run-test/daily-basic-tests.txt
+++ b/ndb/test/run-test/daily-basic-tests.txt
@@ -434,6 +434,18 @@ max-time: 500
cmd: testScan
args: -l 100 -n Scan-bug8262 T7
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug15587 T1
+
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug15632 T1
+
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug15685 T1
+
# OLD FLEX
max-time: 500
cmd: flexBench