diff options
author | unknown <jonas@perch.ndb.mysql.com> | 2007-06-18 07:48:59 +0200 |
---|---|---|
committer | unknown <jonas@perch.ndb.mysql.com> | 2007-06-18 07:48:59 +0200 |
commit | 4a0c83220a085034f3b2ac0ad615becd1405e55b (patch) | |
tree | 57610c5d7d8847601d213a7cc9e05f1f76215762 /storage | |
parent | b26fa8543f0fcbcb91b4810e5a20e41042577901 (diff) | |
download | mariadb-git-4a0c83220a085034f3b2ac0ad615becd1405e55b.tar.gz |
ndb - bug#29167
Fix case where all node in node group dies before they saved sysfile (wrt gcp)
and Qmgr incorrectly thinks that "node group is missing"
storage/ndb/src/kernel/blocks/ERROR_codes.txt:
code
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
fix
storage/ndb/test/ndbapi/testSystemRestart.cpp:
test
storage/ndb/test/run-test/daily-basic-tests.txt:
test
Diffstat (limited to 'storage')
-rw-r--r-- | storage/ndb/src/kernel/blocks/ERROR_codes.txt | 4 | ||||
-rw-r--r-- | storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 13 | ||||
-rw-r--r-- | storage/ndb/test/ndbapi/testSystemRestart.cpp | 48 | ||||
-rw-r--r-- | storage/ndb/test/run-test/daily-basic-tests.txt | 4 |
4 files changed, 68 insertions, 1 deletions
diff --git a/storage/ndb/src/kernel/blocks/ERROR_codes.txt b/storage/ndb/src/kernel/blocks/ERROR_codes.txt index 67eb89f850f..17d6c9b0867 100644 --- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt @@ -5,7 +5,7 @@ Next DBACC 3002 Next DBTUP 4029 Next DBLQH 5045 Next DBDICT 6007 -Next DBDIH 7183 +Next DBDIH 7184 Next DBTC 8040 Next CMVMI 9000 Next BACKUP 10038 @@ -75,6 +75,8 @@ Delay GCP_SAVEREQ by 10 secs 7180: Crash master during master-take-over in execMASTER_LCPCONF +7183: Crash when receiving COPY_GCIREQ + ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ----------------------------------------------------------------- diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index bc14eec1f98..4a103a76323 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -747,6 +747,8 @@ done: } ndbrequire(ok); + CRASH_INSERTION(7183); + /* ----------------------------------------------------------------------- */ /* WE START BY TRYING TO OPEN THE FIRST RESTORABLE GCI FILE. */ /* ----------------------------------------------------------------------- */ @@ -1230,6 +1232,17 @@ void Dbdih::execDIH_RESTARTREQ(Signal* signal) Uint32 ng = Sysfile::getNodeGroup(i, SYSFILE->nodeGroups); ndbrequire(ng < MAX_NDB_NODES); Uint32 gci = node_gcis[i]; + if (gci < SYSFILE->lastCompletedGCI[i]) + { + jam(); + /** + * Handle case, where *I* know that node complete GCI + * but node does not...bug#29167 + * i.e node died before it wrote own sysfile + */ + gci = SYSFILE->lastCompletedGCI[i]; + } + if (gci > node_group_gcis[ng]) { jam(); diff --git a/storage/ndb/test/ndbapi/testSystemRestart.cpp b/storage/ndb/test/ndbapi/testSystemRestart.cpp index bd5cd3dd3c8..8fada42697d 100644 --- a/storage/ndb/test/ndbapi/testSystemRestart.cpp +++ b/storage/ndb/test/ndbapi/testSystemRestart.cpp @@ -1219,6 +1219,48 @@ runBug24664(NDBT_Context* ctx, NDBT_Step* step) return result; } +int +runBug29167(NDBT_Context* ctx, NDBT_Step* step) +{ + int result = NDBT_OK; + NdbRestarter restarter; + Ndb* pNdb = GETNDB(step); + const Uint32 nodeCount = restarter.getNumDbNodes(); + + if (nodeCount < 2) + return NDBT_OK; + + int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 }; + NdbLogEventHandle handle = + ndb_mgm_create_logevent_handle(restarter.handle, filter); + + struct ndb_logevent event; + int master = restarter.getMasterNodeId(); + do { + int node1 = restarter.getRandomNodeOtherNodeGroup(master, rand()); + int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand()); + + int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + restarter.dumpStateAllNodes(val2, 2); + int dump[] = { DumpStateOrd::DihSetTimeBetweenGcp, 30000 }; + restarter.dumpStateAllNodes(dump, 2); + + while(ndb_logevent_get_next(handle, &event, 0) >= 0 && + event.type != NDB_LE_GlobalCheckpointCompleted); + + CHECK(restarter.insertErrorInAllNodes(932) == 0); + + CHECK(restarter.insertErrorInNode(node1, 7183) == 0); + CHECK(restarter.insertErrorInNode(node2, 7183) == 0); + + CHECK(restarter.waitClusterNoStart() == 0); + restarter.startAll(); + CHECK(restarter.waitClusterStarted() == 0); + } while(false); + + return result; +} + NDBT_TESTSUITE(testSystemRestart); TESTCASE("SR1", "Basic system restart test. Focus on testing restart from REDO log.\n" @@ -1399,6 +1441,12 @@ TESTCASE("Bug24664", STEP(runBug24664); FINALIZER(runClearTable); } +TESTCASE("Bug29167", "") +{ + INITIALIZER(runWaitStarted); + STEP(runBug29167); +} + NDBT_TESTSUITE_END(testSystemRestart); int main(int argc, const char** argv){ diff --git a/storage/ndb/test/run-test/daily-basic-tests.txt b/storage/ndb/test/run-test/daily-basic-tests.txt index 6ce2da47670..19d6c63d08a 100644 --- a/storage/ndb/test/run-test/daily-basic-tests.txt +++ b/storage/ndb/test/run-test/daily-basic-tests.txt @@ -485,6 +485,10 @@ max-time: 1000 cmd: testNodeRestart args: -n Bug27003 T1 +max-time: 300 +cmd: testSystemRestart +args: -n Bug29167 T1 + max-time: 1000 cmd: testNodeRestart args: -n Bug27283 T1 |