diff options
author | unknown <jonas@perch.ndb.mysql.com> | 2006-10-12 14:04:20 +0200 |
---|---|---|
committer | unknown <jonas@perch.ndb.mysql.com> | 2006-10-12 14:04:20 +0200 |
commit | 092cf80a3c6414d35e5fdafe5b4b4fb0381c5f4d (patch) | |
tree | 67f2b921064b94fd868d4f3ff511fa3aecbb0ed7 /ndb/src | |
parent | 6ef83b2ed47130ffa6c37cb7b64d80762528a67f (diff) | |
parent | d3b8190f26c4ad43183ff4541aacdb36e04eb2b4 (diff) | |
download | mariadb-git-092cf80a3c6414d35e5fdafe5b4b4fb0381c5f4d.tar.gz |
Merge perch.ndb.mysql.com:/home/jonas/src/41-work
into perch.ndb.mysql.com:/home/jonas/src/50-work
ndb/src/kernel/blocks/dbdih/Dbdih.hpp:
Auto merged
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
Auto merged
ndb/src/kernel/blocks/ERROR_codes.txt:
merge
Diffstat (limited to 'ndb/src')
-rw-r--r-- | ndb/src/kernel/blocks/ERROR_codes.txt | 4 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbdih/Dbdih.hpp | 2 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 200 |
3 files changed, 128 insertions, 78 deletions
diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index c8c9e82efc2..74d5df2c1e6 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -5,7 +5,7 @@ Next DBACC 3002 Next DBTUP 4014 Next DBLQH 5043 Next DBDICT 6007 -Next DBDIH 7177 +Next DBDIH 7178 Next DBTC 8038 Next CMVMI 9000 Next BACKUP 10022 @@ -66,6 +66,8 @@ Delay GCP_SAVEREQ by 10 secs 7030: Delay in GCP_PREPARE until node has completed a node failure 7031: Delay in GCP_PREPARE and die 3s later +7177: Delay copying of sysfileData in execCOPY_GCIREQ + ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ----------------------------------------------------------------- diff --git a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp index 5c2cfac5eb1..53e54e96d2b 100644 --- a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp +++ b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp @@ -1048,6 +1048,8 @@ private: void removeStoredReplica(FragmentstorePtr regFragptr, ReplicaRecordPtr replicaPtr); void searchStoredReplicas(FragmentstorePtr regFragptr); + bool setup_create_replica(FragmentstorePtr, CreateReplicaRecord*, + ConstPtr<ReplicaRecord>); void updateNodeInfo(FragmentstorePtr regFragptr); //------------------------------------ diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 14464001522..294573c90c9 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -627,22 +627,48 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal) ndbrequire(c_copyGCISlave.m_copyReason == CopyGCIReq::IDLE); ndbrequire(c_copyGCISlave.m_expectedNextWord == tstart); ndbrequire(reason != CopyGCIReq::IDLE); - + bool isdone = (tstart + CopyGCIReq::DATA_SIZE) >= Sysfile::SYSFILE_SIZE32; + + if (ERROR_INSERTED(7177)) + { + jam(); + + if (signal->getLength() == 3) + { + jam(); + goto done; + } + } + arrGuard(tstart + CopyGCIReq::DATA_SIZE, sizeof(sysfileData)/4); for(Uint32 i = 0; i<CopyGCIReq::DATA_SIZE; i++) cdata[tstart+i] = copyGCI->data[i]; - if ((tstart + CopyGCIReq::DATA_SIZE) >= Sysfile::SYSFILE_SIZE32) { + if (ERROR_INSERTED(7177) && isMaster() && isdone) + { + sendSignalWithDelay(reference(), GSN_COPY_GCIREQ, signal, 1000, 3); + return; + } + +done: + if (isdone) + { jam(); c_copyGCISlave.m_expectedNextWord = 0; - } else { + } + else + { jam(); c_copyGCISlave.m_expectedNextWord += CopyGCIReq::DATA_SIZE; return; - }//if - - memcpy(sysfileData, cdata, sizeof(sysfileData)); + } + if (cmasterdihref != reference()) + { + jam(); + memcpy(sysfileData, cdata, sizeof(sysfileData)); + } + c_copyGCISlave.m_copyReason = reason; c_copyGCISlave.m_senderRef = signal->senderBlockRef(); c_copyGCISlave.m_senderData = copyGCI->anyData; @@ -8441,14 +8467,30 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPtr){ resetReplicaLcp(replicaPtr.p, newestRestorableGCI); - /* ----------------------------------------------------------------- - * LINK THE REPLICA INTO THE STORED REPLICA LIST. WE WILL USE THIS - * NODE AS A STORED REPLICA. - * WE MUST FIRST LINK IT OUT OF THE LIST OF OLD STORED REPLICAS. - * --------------------------------------------------------------- */ - removeOldStoredReplica(fragPtr, replicaPtr); - linkStoredReplica(fragPtr, replicaPtr); - + /** + * Make sure we can also find REDO for restoring replica... + */ + { + CreateReplicaRecord createReplica; + ConstPtr<ReplicaRecord> constReplicaPtr; + constReplicaPtr.i = replicaPtr.i; + constReplicaPtr.p = replicaPtr.p; + if (setup_create_replica(fragPtr, + &createReplica, constReplicaPtr)) + { + removeOldStoredReplica(fragPtr, replicaPtr); + linkStoredReplica(fragPtr, replicaPtr); + } + else + { + infoEvent("Forcing take-over of node %d due to unsufficient REDO" + " for table %d fragment: %d", + nodePtr.i, tabPtr.i, i); + + setNodeActiveStatus(nodePtr.i, + Sysfile::NS_NotActive_NotTakenOver); + } + } } default: jam(); @@ -9492,6 +9534,7 @@ void Dbdih::calculateKeepGciLab(Signal* signal, Uint32 tableId, Uint32 fragId) FragmentstorePtr fragPtr; getFragstore(tabPtr.p, fragId, fragPtr); checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->storedReplicas); + checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->oldStoredReplicas); fragId++; if (fragId >= tabPtr.p->totalfragments) { jam(); @@ -12487,16 +12530,75 @@ void Dbdih::removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr) /* CHECKPOINT WITHOUT NEEDING ANY EXTRA LOGGING FACILITIES.*/ /* A MAXIMUM OF FOUR NODES IS RETRIEVED. */ /*************************************************************************/ +bool +Dbdih::setup_create_replica(FragmentstorePtr fragPtr, + CreateReplicaRecord* createReplicaPtrP, + ConstPtr<ReplicaRecord> replicaPtr) +{ + createReplicaPtrP->dataNodeId = replicaPtr.p->procNode; + createReplicaPtrP->replicaRec = replicaPtr.i; + + /* ----------------------------------------------------------------- */ + /* WE NEED TO SEARCH FOR A PROPER LOCAL CHECKPOINT TO USE FOR THE */ + /* SYSTEM RESTART. */ + /* ----------------------------------------------------------------- */ + Uint32 startGci; + Uint32 startLcpNo; + Uint32 stopGci = SYSFILE->newestRestorableGCI; + bool result = findStartGci(replicaPtr, + stopGci, + startGci, + startLcpNo); + if (!result) + { + jam(); + /* --------------------------------------------------------------- */ + /* WE COULD NOT FIND ANY LOCAL CHECKPOINT. THE FRAGMENT THUS DO NOT*/ + /* CONTAIN ANY VALID LOCAL CHECKPOINT. IT DOES HOWEVER CONTAIN A */ + /* VALID FRAGMENT LOG. THUS BY FIRST CREATING THE FRAGMENT AND THEN*/ + /* EXECUTING THE FRAGMENT LOG WE CAN CREATE THE FRAGMENT AS */ + /* DESIRED. THIS SHOULD ONLY OCCUR AFTER CREATING A FRAGMENT. */ + /* */ + /* TO INDICATE THAT NO LOCAL CHECKPOINT IS TO BE USED WE SET THE */ + /* LOCAL CHECKPOINT TO ZNIL. */ + /* --------------------------------------------------------------- */ + createReplicaPtrP->lcpNo = ZNIL; + } + else + { + jam(); + /* --------------------------------------------------------------- */ + /* WE FOUND A PROPER LOCAL CHECKPOINT TO RESTART FROM. */ + /* SET LOCAL CHECKPOINT ID AND LOCAL CHECKPOINT NUMBER. */ + /* --------------------------------------------------------------- */ + createReplicaPtrP->lcpNo = startLcpNo; + arrGuard(startLcpNo, MAX_LCP_STORED); + createReplicaPtrP->createLcpId = replicaPtr.p->lcpId[startLcpNo]; + }//if + + + /* ----------------------------------------------------------------- */ + /* WE HAVE EITHER FOUND A LOCAL CHECKPOINT OR WE ARE PLANNING TO */ + /* EXECUTE THE LOG FROM THE INITIAL CREATION OF THE TABLE. IN BOTH */ + /* CASES WE NEED TO FIND A SET OF LOGS THAT CAN EXECUTE SUCH THAT */ + /* WE RECOVER TO THE SYSTEM RESTART GLOBAL CHECKPOINT. */ + /* -_--------------------------------------------------------------- */ + return findLogNodes(createReplicaPtrP, fragPtr, startGci, stopGci); +} + void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr) { Uint32 nextReplicaPtrI; - ConstPtr<ReplicaRecord> replicaPtr; + Ptr<ReplicaRecord> replicaPtr; replicaPtr.i = fragPtr.p->storedReplicas; while (replicaPtr.i != RNIL) { jam(); ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord); nextReplicaPtrI = replicaPtr.p->nextReplica; + ConstPtr<ReplicaRecord> constReplicaPtr; + constReplicaPtr.i = replicaPtr.i; + constReplicaPtr.p = replicaPtr.p; NodeRecordPtr nodePtr; nodePtr.i = replicaPtr.p->procNode; ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord); @@ -12516,69 +12618,13 @@ void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr) createReplicaPtr.i = cnoOfCreateReplicas; ptrCheckGuard(createReplicaPtr, 4, createReplicaRecord); cnoOfCreateReplicas++; - createReplicaPtr.p->dataNodeId = replicaPtr.p->procNode; - createReplicaPtr.p->replicaRec = replicaPtr.i; - /* ----------------------------------------------------------------- */ - /* WE NEED TO SEARCH FOR A PROPER LOCAL CHECKPOINT TO USE FOR THE */ - /* SYSTEM RESTART. */ - /* ----------------------------------------------------------------- */ - Uint32 startGci; - Uint32 startLcpNo; - Uint32 stopGci = SYSFILE->newestRestorableGCI; - bool result = findStartGci(replicaPtr, - stopGci, - startGci, - startLcpNo); - if (!result) { - jam(); - /* --------------------------------------------------------------- */ - /* WE COULD NOT FIND ANY LOCAL CHECKPOINT. THE FRAGMENT THUS DO NOT*/ - /* CONTAIN ANY VALID LOCAL CHECKPOINT. IT DOES HOWEVER CONTAIN A */ - /* VALID FRAGMENT LOG. THUS BY FIRST CREATING THE FRAGMENT AND THEN*/ - /* EXECUTING THE FRAGMENT LOG WE CAN CREATE THE FRAGMENT AS */ - /* DESIRED. THIS SHOULD ONLY OCCUR AFTER CREATING A FRAGMENT. */ - /* */ - /* TO INDICATE THAT NO LOCAL CHECKPOINT IS TO BE USED WE SET THE */ - /* LOCAL CHECKPOINT TO ZNIL. */ - /* --------------------------------------------------------------- */ - createReplicaPtr.p->lcpNo = ZNIL; - } else { - jam(); - /* --------------------------------------------------------------- */ - /* WE FOUND A PROPER LOCAL CHECKPOINT TO RESTART FROM. */ - /* SET LOCAL CHECKPOINT ID AND LOCAL CHECKPOINT NUMBER. */ - /* --------------------------------------------------------------- */ - createReplicaPtr.p->lcpNo = startLcpNo; - arrGuard(startLcpNo, MAX_LCP_STORED); - createReplicaPtr.p->createLcpId = replicaPtr.p->lcpId[startLcpNo]; - }//if - - if(ERROR_INSERTED(7073) || ERROR_INSERTED(7074)){ - jam(); - nodePtr.p->nodeStatus = NodeRecord::DEAD; - } - - /* ----------------------------------------------------------------- */ - /* WE HAVE EITHER FOUND A LOCAL CHECKPOINT OR WE ARE PLANNING TO */ - /* EXECUTE THE LOG FROM THE INITIAL CREATION OF THE TABLE. IN BOTH */ - /* CASES WE NEED TO FIND A SET OF LOGS THAT CAN EXECUTE SUCH THAT */ - /* WE RECOVER TO THE SYSTEM RESTART GLOBAL CHECKPOINT. */ - /* -_--------------------------------------------------------------- */ - if (!findLogNodes(createReplicaPtr.p, fragPtr, startGci, stopGci)) { - jam(); - /* --------------------------------------------------------------- */ - /* WE WERE NOT ABLE TO FIND ANY WAY OF RESTORING THIS REPLICA. */ - /* THIS IS A POTENTIAL SYSTEM ERROR. */ - /* --------------------------------------------------------------- */ - cnoOfCreateReplicas--; - return; - }//if - - if(ERROR_INSERTED(7073) || ERROR_INSERTED(7074)){ - jam(); - nodePtr.p->nodeStatus = NodeRecord::ALIVE; - } + /** + * Should have been checked in resetReplicaSr + */ + ndbrequire(setup_create_replica(fragPtr, + createReplicaPtr.p, + constReplicaPtr)); break; } default: |