summaryrefslogtreecommitdiff
path: root/ndb/src
diff options
context:
space:
mode:
authorunknown <jonas@perch.ndb.mysql.com>2006-10-12 14:04:20 +0200
committerunknown <jonas@perch.ndb.mysql.com>2006-10-12 14:04:20 +0200
commit092cf80a3c6414d35e5fdafe5b4b4fb0381c5f4d (patch)
tree67f2b921064b94fd868d4f3ff511fa3aecbb0ed7 /ndb/src
parent6ef83b2ed47130ffa6c37cb7b64d80762528a67f (diff)
parentd3b8190f26c4ad43183ff4541aacdb36e04eb2b4 (diff)
downloadmariadb-git-092cf80a3c6414d35e5fdafe5b4b4fb0381c5f4d.tar.gz
Merge perch.ndb.mysql.com:/home/jonas/src/41-work
into perch.ndb.mysql.com:/home/jonas/src/50-work ndb/src/kernel/blocks/dbdih/Dbdih.hpp: Auto merged ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: Auto merged ndb/src/kernel/blocks/ERROR_codes.txt: merge
Diffstat (limited to 'ndb/src')
-rw-r--r--ndb/src/kernel/blocks/ERROR_codes.txt4
-rw-r--r--ndb/src/kernel/blocks/dbdih/Dbdih.hpp2
-rw-r--r--ndb/src/kernel/blocks/dbdih/DbdihMain.cpp200
3 files changed, 128 insertions, 78 deletions
diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt
index c8c9e82efc2..74d5df2c1e6 100644
--- a/ndb/src/kernel/blocks/ERROR_codes.txt
+++ b/ndb/src/kernel/blocks/ERROR_codes.txt
@@ -5,7 +5,7 @@ Next DBACC 3002
Next DBTUP 4014
Next DBLQH 5043
Next DBDICT 6007
-Next DBDIH 7177
+Next DBDIH 7178
Next DBTC 8038
Next CMVMI 9000
Next BACKUP 10022
@@ -66,6 +66,8 @@ Delay GCP_SAVEREQ by 10 secs
7030: Delay in GCP_PREPARE until node has completed a node failure
7031: Delay in GCP_PREPARE and die 3s later
+7177: Delay copying of sysfileData in execCOPY_GCIREQ
+
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
-----------------------------------------------------------------
diff --git a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
index 5c2cfac5eb1..53e54e96d2b 100644
--- a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
+++ b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
@@ -1048,6 +1048,8 @@ private:
void removeStoredReplica(FragmentstorePtr regFragptr,
ReplicaRecordPtr replicaPtr);
void searchStoredReplicas(FragmentstorePtr regFragptr);
+ bool setup_create_replica(FragmentstorePtr, CreateReplicaRecord*,
+ ConstPtr<ReplicaRecord>);
void updateNodeInfo(FragmentstorePtr regFragptr);
//------------------------------------
diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
index 14464001522..294573c90c9 100644
--- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
@@ -627,22 +627,48 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal)
ndbrequire(c_copyGCISlave.m_copyReason == CopyGCIReq::IDLE);
ndbrequire(c_copyGCISlave.m_expectedNextWord == tstart);
ndbrequire(reason != CopyGCIReq::IDLE);
-
+ bool isdone = (tstart + CopyGCIReq::DATA_SIZE) >= Sysfile::SYSFILE_SIZE32;
+
+ if (ERROR_INSERTED(7177))
+ {
+ jam();
+
+ if (signal->getLength() == 3)
+ {
+ jam();
+ goto done;
+ }
+ }
+
arrGuard(tstart + CopyGCIReq::DATA_SIZE, sizeof(sysfileData)/4);
for(Uint32 i = 0; i<CopyGCIReq::DATA_SIZE; i++)
cdata[tstart+i] = copyGCI->data[i];
- if ((tstart + CopyGCIReq::DATA_SIZE) >= Sysfile::SYSFILE_SIZE32) {
+ if (ERROR_INSERTED(7177) && isMaster() && isdone)
+ {
+ sendSignalWithDelay(reference(), GSN_COPY_GCIREQ, signal, 1000, 3);
+ return;
+ }
+
+done:
+ if (isdone)
+ {
jam();
c_copyGCISlave.m_expectedNextWord = 0;
- } else {
+ }
+ else
+ {
jam();
c_copyGCISlave.m_expectedNextWord += CopyGCIReq::DATA_SIZE;
return;
- }//if
-
- memcpy(sysfileData, cdata, sizeof(sysfileData));
+ }
+ if (cmasterdihref != reference())
+ {
+ jam();
+ memcpy(sysfileData, cdata, sizeof(sysfileData));
+ }
+
c_copyGCISlave.m_copyReason = reason;
c_copyGCISlave.m_senderRef = signal->senderBlockRef();
c_copyGCISlave.m_senderData = copyGCI->anyData;
@@ -8441,14 +8467,30 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPtr){
resetReplicaLcp(replicaPtr.p, newestRestorableGCI);
- /* -----------------------------------------------------------------
- * LINK THE REPLICA INTO THE STORED REPLICA LIST. WE WILL USE THIS
- * NODE AS A STORED REPLICA.
- * WE MUST FIRST LINK IT OUT OF THE LIST OF OLD STORED REPLICAS.
- * --------------------------------------------------------------- */
- removeOldStoredReplica(fragPtr, replicaPtr);
- linkStoredReplica(fragPtr, replicaPtr);
-
+ /**
+ * Make sure we can also find REDO for restoring replica...
+ */
+ {
+ CreateReplicaRecord createReplica;
+ ConstPtr<ReplicaRecord> constReplicaPtr;
+ constReplicaPtr.i = replicaPtr.i;
+ constReplicaPtr.p = replicaPtr.p;
+ if (setup_create_replica(fragPtr,
+ &createReplica, constReplicaPtr))
+ {
+ removeOldStoredReplica(fragPtr, replicaPtr);
+ linkStoredReplica(fragPtr, replicaPtr);
+ }
+ else
+ {
+ infoEvent("Forcing take-over of node %d due to unsufficient REDO"
+ " for table %d fragment: %d",
+ nodePtr.i, tabPtr.i, i);
+
+ setNodeActiveStatus(nodePtr.i,
+ Sysfile::NS_NotActive_NotTakenOver);
+ }
+ }
}
default:
jam();
@@ -9492,6 +9534,7 @@ void Dbdih::calculateKeepGciLab(Signal* signal, Uint32 tableId, Uint32 fragId)
FragmentstorePtr fragPtr;
getFragstore(tabPtr.p, fragId, fragPtr);
checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->storedReplicas);
+ checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->oldStoredReplicas);
fragId++;
if (fragId >= tabPtr.p->totalfragments) {
jam();
@@ -12487,16 +12530,75 @@ void Dbdih::removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr)
/* CHECKPOINT WITHOUT NEEDING ANY EXTRA LOGGING FACILITIES.*/
/* A MAXIMUM OF FOUR NODES IS RETRIEVED. */
/*************************************************************************/
+bool
+Dbdih::setup_create_replica(FragmentstorePtr fragPtr,
+ CreateReplicaRecord* createReplicaPtrP,
+ ConstPtr<ReplicaRecord> replicaPtr)
+{
+ createReplicaPtrP->dataNodeId = replicaPtr.p->procNode;
+ createReplicaPtrP->replicaRec = replicaPtr.i;
+
+ /* ----------------------------------------------------------------- */
+ /* WE NEED TO SEARCH FOR A PROPER LOCAL CHECKPOINT TO USE FOR THE */
+ /* SYSTEM RESTART. */
+ /* ----------------------------------------------------------------- */
+ Uint32 startGci;
+ Uint32 startLcpNo;
+ Uint32 stopGci = SYSFILE->newestRestorableGCI;
+ bool result = findStartGci(replicaPtr,
+ stopGci,
+ startGci,
+ startLcpNo);
+ if (!result)
+ {
+ jam();
+ /* --------------------------------------------------------------- */
+ /* WE COULD NOT FIND ANY LOCAL CHECKPOINT. THE FRAGMENT THUS DO NOT*/
+ /* CONTAIN ANY VALID LOCAL CHECKPOINT. IT DOES HOWEVER CONTAIN A */
+ /* VALID FRAGMENT LOG. THUS BY FIRST CREATING THE FRAGMENT AND THEN*/
+ /* EXECUTING THE FRAGMENT LOG WE CAN CREATE THE FRAGMENT AS */
+ /* DESIRED. THIS SHOULD ONLY OCCUR AFTER CREATING A FRAGMENT. */
+ /* */
+ /* TO INDICATE THAT NO LOCAL CHECKPOINT IS TO BE USED WE SET THE */
+ /* LOCAL CHECKPOINT TO ZNIL. */
+ /* --------------------------------------------------------------- */
+ createReplicaPtrP->lcpNo = ZNIL;
+ }
+ else
+ {
+ jam();
+ /* --------------------------------------------------------------- */
+ /* WE FOUND A PROPER LOCAL CHECKPOINT TO RESTART FROM. */
+ /* SET LOCAL CHECKPOINT ID AND LOCAL CHECKPOINT NUMBER. */
+ /* --------------------------------------------------------------- */
+ createReplicaPtrP->lcpNo = startLcpNo;
+ arrGuard(startLcpNo, MAX_LCP_STORED);
+ createReplicaPtrP->createLcpId = replicaPtr.p->lcpId[startLcpNo];
+ }//if
+
+
+ /* ----------------------------------------------------------------- */
+ /* WE HAVE EITHER FOUND A LOCAL CHECKPOINT OR WE ARE PLANNING TO */
+ /* EXECUTE THE LOG FROM THE INITIAL CREATION OF THE TABLE. IN BOTH */
+ /* CASES WE NEED TO FIND A SET OF LOGS THAT CAN EXECUTE SUCH THAT */
+ /* WE RECOVER TO THE SYSTEM RESTART GLOBAL CHECKPOINT. */
+ /* -_--------------------------------------------------------------- */
+ return findLogNodes(createReplicaPtrP, fragPtr, startGci, stopGci);
+}
+
void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
{
Uint32 nextReplicaPtrI;
- ConstPtr<ReplicaRecord> replicaPtr;
+ Ptr<ReplicaRecord> replicaPtr;
replicaPtr.i = fragPtr.p->storedReplicas;
while (replicaPtr.i != RNIL) {
jam();
ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
nextReplicaPtrI = replicaPtr.p->nextReplica;
+ ConstPtr<ReplicaRecord> constReplicaPtr;
+ constReplicaPtr.i = replicaPtr.i;
+ constReplicaPtr.p = replicaPtr.p;
NodeRecordPtr nodePtr;
nodePtr.i = replicaPtr.p->procNode;
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
@@ -12516,69 +12618,13 @@ void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
createReplicaPtr.i = cnoOfCreateReplicas;
ptrCheckGuard(createReplicaPtr, 4, createReplicaRecord);
cnoOfCreateReplicas++;
- createReplicaPtr.p->dataNodeId = replicaPtr.p->procNode;
- createReplicaPtr.p->replicaRec = replicaPtr.i;
- /* ----------------------------------------------------------------- */
- /* WE NEED TO SEARCH FOR A PROPER LOCAL CHECKPOINT TO USE FOR THE */
- /* SYSTEM RESTART. */
- /* ----------------------------------------------------------------- */
- Uint32 startGci;
- Uint32 startLcpNo;
- Uint32 stopGci = SYSFILE->newestRestorableGCI;
- bool result = findStartGci(replicaPtr,
- stopGci,
- startGci,
- startLcpNo);
- if (!result) {
- jam();
- /* --------------------------------------------------------------- */
- /* WE COULD NOT FIND ANY LOCAL CHECKPOINT. THE FRAGMENT THUS DO NOT*/
- /* CONTAIN ANY VALID LOCAL CHECKPOINT. IT DOES HOWEVER CONTAIN A */
- /* VALID FRAGMENT LOG. THUS BY FIRST CREATING THE FRAGMENT AND THEN*/
- /* EXECUTING THE FRAGMENT LOG WE CAN CREATE THE FRAGMENT AS */
- /* DESIRED. THIS SHOULD ONLY OCCUR AFTER CREATING A FRAGMENT. */
- /* */
- /* TO INDICATE THAT NO LOCAL CHECKPOINT IS TO BE USED WE SET THE */
- /* LOCAL CHECKPOINT TO ZNIL. */
- /* --------------------------------------------------------------- */
- createReplicaPtr.p->lcpNo = ZNIL;
- } else {
- jam();
- /* --------------------------------------------------------------- */
- /* WE FOUND A PROPER LOCAL CHECKPOINT TO RESTART FROM. */
- /* SET LOCAL CHECKPOINT ID AND LOCAL CHECKPOINT NUMBER. */
- /* --------------------------------------------------------------- */
- createReplicaPtr.p->lcpNo = startLcpNo;
- arrGuard(startLcpNo, MAX_LCP_STORED);
- createReplicaPtr.p->createLcpId = replicaPtr.p->lcpId[startLcpNo];
- }//if
-
- if(ERROR_INSERTED(7073) || ERROR_INSERTED(7074)){
- jam();
- nodePtr.p->nodeStatus = NodeRecord::DEAD;
- }
-
- /* ----------------------------------------------------------------- */
- /* WE HAVE EITHER FOUND A LOCAL CHECKPOINT OR WE ARE PLANNING TO */
- /* EXECUTE THE LOG FROM THE INITIAL CREATION OF THE TABLE. IN BOTH */
- /* CASES WE NEED TO FIND A SET OF LOGS THAT CAN EXECUTE SUCH THAT */
- /* WE RECOVER TO THE SYSTEM RESTART GLOBAL CHECKPOINT. */
- /* -_--------------------------------------------------------------- */
- if (!findLogNodes(createReplicaPtr.p, fragPtr, startGci, stopGci)) {
- jam();
- /* --------------------------------------------------------------- */
- /* WE WERE NOT ABLE TO FIND ANY WAY OF RESTORING THIS REPLICA. */
- /* THIS IS A POTENTIAL SYSTEM ERROR. */
- /* --------------------------------------------------------------- */
- cnoOfCreateReplicas--;
- return;
- }//if
-
- if(ERROR_INSERTED(7073) || ERROR_INSERTED(7074)){
- jam();
- nodePtr.p->nodeStatus = NodeRecord::ALIVE;
- }
+ /**
+ * Should have been checked in resetReplicaSr
+ */
+ ndbrequire(setup_create_replica(fragPtr,
+ createReplicaPtr.p,
+ constReplicaPtr));
break;
}
default: