summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorunknown <jonas@perch.ndb.mysql.com>2007-06-18 07:48:59 +0200
committerunknown <jonas@perch.ndb.mysql.com>2007-06-18 07:48:59 +0200
commit4a0c83220a085034f3b2ac0ad615becd1405e55b (patch)
tree57610c5d7d8847601d213a7cc9e05f1f76215762 /storage
parentb26fa8543f0fcbcb91b4810e5a20e41042577901 (diff)
downloadmariadb-git-4a0c83220a085034f3b2ac0ad615becd1405e55b.tar.gz
ndb - bug#29167
Fix case where all node in node group dies before they saved sysfile (wrt gcp) and Qmgr incorrectly thinks that "node group is missing" storage/ndb/src/kernel/blocks/ERROR_codes.txt: code storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: fix storage/ndb/test/ndbapi/testSystemRestart.cpp: test storage/ndb/test/run-test/daily-basic-tests.txt: test
Diffstat (limited to 'storage')
-rw-r--r--storage/ndb/src/kernel/blocks/ERROR_codes.txt4
-rw-r--r--storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp13
-rw-r--r--storage/ndb/test/ndbapi/testSystemRestart.cpp48
-rw-r--r--storage/ndb/test/run-test/daily-basic-tests.txt4
4 files changed, 68 insertions, 1 deletions
diff --git a/storage/ndb/src/kernel/blocks/ERROR_codes.txt b/storage/ndb/src/kernel/blocks/ERROR_codes.txt
index 67eb89f850f..17d6c9b0867 100644
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt
@@ -5,7 +5,7 @@ Next DBACC 3002
Next DBTUP 4029
Next DBLQH 5045
Next DBDICT 6007
-Next DBDIH 7183
+Next DBDIH 7184
Next DBTC 8040
Next CMVMI 9000
Next BACKUP 10038
@@ -75,6 +75,8 @@ Delay GCP_SAVEREQ by 10 secs
7180: Crash master during master-take-over in execMASTER_LCPCONF
+7183: Crash when receiving COPY_GCIREQ
+
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
-----------------------------------------------------------------
diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
index bc14eec1f98..4a103a76323 100644
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
@@ -747,6 +747,8 @@ done:
}
ndbrequire(ok);
+ CRASH_INSERTION(7183);
+
/* ----------------------------------------------------------------------- */
/* WE START BY TRYING TO OPEN THE FIRST RESTORABLE GCI FILE. */
/* ----------------------------------------------------------------------- */
@@ -1230,6 +1232,17 @@ void Dbdih::execDIH_RESTARTREQ(Signal* signal)
Uint32 ng = Sysfile::getNodeGroup(i, SYSFILE->nodeGroups);
ndbrequire(ng < MAX_NDB_NODES);
Uint32 gci = node_gcis[i];
+ if (gci < SYSFILE->lastCompletedGCI[i])
+ {
+ jam();
+ /**
+ * Handle case, where *I* know that node complete GCI
+ * but node does not...bug#29167
+ * i.e node died before it wrote own sysfile
+ */
+ gci = SYSFILE->lastCompletedGCI[i];
+ }
+
if (gci > node_group_gcis[ng])
{
jam();
diff --git a/storage/ndb/test/ndbapi/testSystemRestart.cpp b/storage/ndb/test/ndbapi/testSystemRestart.cpp
index bd5cd3dd3c8..8fada42697d 100644
--- a/storage/ndb/test/ndbapi/testSystemRestart.cpp
+++ b/storage/ndb/test/ndbapi/testSystemRestart.cpp
@@ -1219,6 +1219,48 @@ runBug24664(NDBT_Context* ctx, NDBT_Step* step)
return result;
}
+int
+runBug29167(NDBT_Context* ctx, NDBT_Step* step)
+{
+ int result = NDBT_OK;
+ NdbRestarter restarter;
+ Ndb* pNdb = GETNDB(step);
+ const Uint32 nodeCount = restarter.getNumDbNodes();
+
+ if (nodeCount < 2)
+ return NDBT_OK;
+
+ int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
+ NdbLogEventHandle handle =
+ ndb_mgm_create_logevent_handle(restarter.handle, filter);
+
+ struct ndb_logevent event;
+ int master = restarter.getMasterNodeId();
+ do {
+ int node1 = restarter.getRandomNodeOtherNodeGroup(master, rand());
+ int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
+
+ int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+ restarter.dumpStateAllNodes(val2, 2);
+ int dump[] = { DumpStateOrd::DihSetTimeBetweenGcp, 30000 };
+ restarter.dumpStateAllNodes(dump, 2);
+
+ while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
+ event.type != NDB_LE_GlobalCheckpointCompleted);
+
+ CHECK(restarter.insertErrorInAllNodes(932) == 0);
+
+ CHECK(restarter.insertErrorInNode(node1, 7183) == 0);
+ CHECK(restarter.insertErrorInNode(node2, 7183) == 0);
+
+ CHECK(restarter.waitClusterNoStart() == 0);
+ restarter.startAll();
+ CHECK(restarter.waitClusterStarted() == 0);
+ } while(false);
+
+ return result;
+}
+
NDBT_TESTSUITE(testSystemRestart);
TESTCASE("SR1",
"Basic system restart test. Focus on testing restart from REDO log.\n"
@@ -1399,6 +1441,12 @@ TESTCASE("Bug24664",
STEP(runBug24664);
FINALIZER(runClearTable);
}
+TESTCASE("Bug29167", "")
+{
+ INITIALIZER(runWaitStarted);
+ STEP(runBug29167);
+}
+
NDBT_TESTSUITE_END(testSystemRestart);
int main(int argc, const char** argv){
diff --git a/storage/ndb/test/run-test/daily-basic-tests.txt b/storage/ndb/test/run-test/daily-basic-tests.txt
index 6ce2da47670..19d6c63d08a 100644
--- a/storage/ndb/test/run-test/daily-basic-tests.txt
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt
@@ -485,6 +485,10 @@ max-time: 1000
cmd: testNodeRestart
args: -n Bug27003 T1
+max-time: 300
+cmd: testSystemRestart
+args: -n Bug29167 T1
+
max-time: 1000
cmd: testNodeRestart
args: -n Bug27283 T1