diff options
author | unknown <tomas@poseidon.mysql.com> | 2007-02-14 11:55:31 +0700 |
---|---|---|
committer | unknown <tomas@poseidon.mysql.com> | 2007-02-14 11:55:31 +0700 |
commit | af4c24bbc7836e26ffe012104b972b41f6ed4e1f (patch) | |
tree | 1017dbcabeb31f553739726ae26c39a955789e3f /storage | |
parent | 414f5868d5ede014dccdeb8e831d9fc4d40c1ac0 (diff) | |
parent | 9467918098a8cf798f3ececc50462f60e9e7579c (diff) | |
download | mariadb-git-af4c24bbc7836e26ffe012104b972b41f6ed4e1f.tar.gz |
Merge poseidon.mysql.com:/home/tomas/mysql-5.0-ndb
into poseidon.mysql.com:/home/tomas/mysql-5.1-new-ndb
storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp:
Auto merged
storage/ndb/src/kernel/vm/WatchDog.cpp:
Auto merged
storage/ndb/src/mgmsrv/MgmtSrvr.cpp:
Auto merged
storage/ndb/src/ndbapi/ClusterMgr.cpp:
Auto merged
storage/ndb/src/ndbapi/ClusterMgr.hpp:
Auto merged
storage/ndb/src/ndbapi/SignalSender.hpp:
Auto merged
storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp:
manual merge
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
manual merge
storage/ndb/src/ndbapi/SignalSender.cpp:
manual merge
Diffstat (limited to 'storage')
-rw-r--r-- | storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp | 5 | ||||
-rw-r--r-- | storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp | 20 | ||||
-rw-r--r-- | storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 130 | ||||
-rw-r--r-- | storage/ndb/src/kernel/vm/WatchDog.cpp | 7 | ||||
-rw-r--r-- | storage/ndb/src/mgmsrv/MgmtSrvr.cpp | 94 | ||||
-rw-r--r-- | storage/ndb/src/ndbapi/ClusterMgr.cpp | 5 | ||||
-rw-r--r-- | storage/ndb/src/ndbapi/ClusterMgr.hpp | 1 | ||||
-rw-r--r-- | storage/ndb/src/ndbapi/SignalSender.hpp | 2 |
8 files changed, 174 insertions, 90 deletions
diff --git a/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp b/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp index 27bb9af03c0..46c5ef3751b 100644 --- a/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp +++ b/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp @@ -107,7 +107,10 @@ public: CmvmiDumpLongSignalMemory = 2601, CmvmiSetRestartOnErrorInsert = 2602, CmvmiTestLongSigWithDelay = 2603, - + CmvmiDumpSubscriptions = 2604, /* note: done to respective outfile + to be able to debug if events + for some reason does not end up + in clusterlog */ LCPContinue = 5900, // 7000 DIH // 7001 DIH diff --git a/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp b/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp index ddf0dc95098..680e03218a9 100644 --- a/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp +++ b/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp @@ -921,7 +921,7 @@ void Cmvmi::execSET_VAR_REQ(Signal* signal) case TimeToWaitAlive: // QMGR - case HeartbeatIntervalDbDb: // TODO ev till Ndbcnt också + case HeartbeatIntervalDbDb: // TODO possibly Ndbcnt too case HeartbeatIntervalDbApi: case ArbitTimeout: sendSignal(QMGR_REF, GSN_SET_VAR_REQ, signal, 3, JBB); @@ -1129,6 +1129,24 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) } } + if (arg == DumpStateOrd::CmvmiDumpSubscriptions) + { + SubscriberPtr ptr; + subscribers.first(ptr); + g_eventLogger.info("List subscriptions:"); + while(ptr.i != RNIL) + { + g_eventLogger.info("Subscription: %u, nodeId: %u, ref: 0x%x", + ptr.i, refToNode(ptr.p->blockRef), ptr.p->blockRef); + for(Uint32 i = 0; i < LogLevel::LOGLEVEL_CATEGORIES; i++) + { + Uint32 level = ptr.p->logLevel.getLogLevel((LogLevel::EventCategory)i); + g_eventLogger.info("Category %u Level %u", i, level); + } + subscribers.next(ptr); + } + } + if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){ infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d", g_sectionSegmentPool.getSize(), diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 9cd7dbfc59b..19ba8d83ecc 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -1883,8 +1883,8 @@ void Dbdih::execSTART_PERMREQ(Signal* signal) return; }//if if (getNodeStatus(nodeId) != NodeRecord::DEAD){ - ndbout << "nodeStatus in START_PERMREQ = " - << (Uint32) getNodeStatus(nodeId) << endl; + g_eventLogger.error("nodeStatus in START_PERMREQ = %u", + (Uint32) getNodeStatus(nodeId)); ndbrequire(false); }//if @@ -4297,9 +4297,9 @@ void Dbdih::checkCopyTab(NodeRecordPtr failedNodePtr) jam(); break; default: - ndbout_c("outstanding gsn: %s(%d)", - getSignalName(c_nodeStartMaster.m_outstandingGsn), - c_nodeStartMaster.m_outstandingGsn); + g_eventLogger.error("outstanding gsn: %s(%d)", + getSignalName(c_nodeStartMaster.m_outstandingGsn), + c_nodeStartMaster.m_outstandingGsn); ndbrequire(false); } @@ -4752,9 +4752,10 @@ void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr) failedNodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver; break; default: - ndbout << "activeStatus = " << (Uint32) failedNodePtr.p->activeStatus; - ndbout << " at failure after NODE_FAILREP of node = "; - ndbout << failedNodePtr.i << endl; + g_eventLogger.error("activeStatus = %u " + "at failure after NODE_FAILREP of node = %u", + (Uint32) failedNodePtr.p->activeStatus, + failedNodePtr.i); ndbrequire(false); break; }//switch @@ -4909,7 +4910,7 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){ /** * Node failure during master take over... */ - ndbout_c("Nodefail during master take over"); + g_eventLogger.info("Nodefail during master take over"); } setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER); @@ -5149,7 +5150,8 @@ void Dbdih::execMASTER_GCPCONF(Signal* signal) if (latestLcpId > SYSFILE->latestLCP_ID) { jam(); #if 0 - ndbout_c("Dbdih: Setting SYSFILE->latestLCP_ID to %d", latestLcpId); + g_eventLogger.info("Dbdih: Setting SYSFILE->latestLCP_ID to %d", + latestLcpId); SYSFILE->latestLCP_ID = latestLcpId; #endif SYSFILE->keepGCI = oldestKeepGci; @@ -5808,7 +5810,7 @@ Dbdih::checkLocalNodefailComplete(Signal* signal, Uint32 failedNodeId, if (ERROR_INSERTED(7030)) { - ndbout_c("Reenable GCP_PREPARE"); + g_eventLogger.info("Reenable GCP_PREPARE"); CLEAR_ERROR_INSERT_VALUE; } @@ -5981,7 +5983,7 @@ Dbdih::sendMASTER_LCPCONF(Signal * signal){ c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__); #if 0 if(c_copyGCISlave.m_copyReason == CopyGCIReq::LOCAL_CHECKPOINT){ - ndbout_c("Dbdih: Also resetting c_copyGCISlave"); + g_eventLogger.info("Dbdih: Also resetting c_copyGCISlave"); c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE; c_copyGCISlave.m_expectedNextWord = 0; } @@ -6066,7 +6068,7 @@ Dbdih::sendMASTER_LCPCONF(Signal * signal){ if(c_lcpState.lcpStatus == LCP_TAB_SAVED){ #ifdef VM_TRACE - ndbout_c("Sending extra GSN_LCP_COMPLETE_REP to new master"); + g_eventLogger.info("Sending extra GSN_LCP_COMPLETE_REP to new master"); #endif sendLCP_COMPLETE_REP(signal); } @@ -6222,7 +6224,7 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal) nodePtr.p->lcpStateAtTakeOver = lcpState; #ifdef VM_TRACE - ndbout_c("MASTER_LCPCONF"); + g_eventLogger.info("MASTER_LCPCONF"); printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0); #endif @@ -6299,7 +6301,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) // protocol. /* --------------------------------------------------------------------- */ #ifdef VM_TRACE - ndbout_c("MASTER_LCPhandling:: LMTOS_ALL_IDLE -> checkLcpStart"); + g_eventLogger.info("MASTER_LCPhandling:: LMTOS_ALL_IDLE -> checkLcpStart"); #endif checkLcpStart(signal, __LINE__); break; @@ -6310,7 +6312,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) // protocol by calculating the keep gci and storing the new lcp id. /* --------------------------------------------------------------------- */ #ifdef VM_TRACE - ndbout_c("MASTER_LCPhandling:: LMTOS_COPY_ONGOING -> storeNewLcpId"); + g_eventLogger.info("MASTER_LCPhandling:: LMTOS_COPY_ONGOING -> storeNewLcpId"); #endif if (c_lcpState.lcpStatus == LCP_STATUS_ACTIVE) { jam(); @@ -6321,7 +6323,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) /*---------------------------------------------------------------------*/ Uint32 lcpId = SYSFILE->latestLCP_ID; #ifdef VM_TRACE - ndbout_c("Decreasing latestLCP_ID from %d to %d", lcpId, lcpId - 1); + g_eventLogger.info("Decreasing latestLCP_ID from %d to %d", lcpId, lcpId - 1); #endif SYSFILE->latestLCP_ID--; }//if @@ -6338,10 +6340,10 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) * complete before finalising the LCP process. * ------------------------------------------------------------------ */ #ifdef VM_TRACE - ndbout_c("MASTER_LCPhandling:: LMTOS_ALL_ACTIVE -> " - "startLcpRoundLoopLab(table=%u, fragment=%u)", - c_lcpMasterTakeOverState.minTableId, - c_lcpMasterTakeOverState.minFragId); + g_eventLogger.info("MASTER_LCPhandling:: LMTOS_ALL_ACTIVE -> " + "startLcpRoundLoopLab(table=%u, fragment=%u)", + c_lcpMasterTakeOverState.minTableId, + c_lcpMasterTakeOverState.minFragId); #endif c_lcpState.keepGci = SYSFILE->keepGCI; @@ -7745,8 +7747,8 @@ void Dbdih::checkGcpStopLab(Signal* signal) if (cgcpSameCounter == 1200) { jam(); #ifdef VM_TRACE - ndbout << "System crash due to GCP Stop in state = "; - ndbout << (Uint32) cgcpStatus << endl; + g_eventLogger.error("System crash due to GCP Stop in state = %u", + (Uint32) cgcpStatus); #endif crashSystemAtGcpStop(signal); return; @@ -7759,8 +7761,8 @@ void Dbdih::checkGcpStopLab(Signal* signal) if (cgcpSameCounter == 1200) { jam(); #ifdef VM_TRACE - ndbout << "System crash due to GCP Stop in state = "; - ndbout << (Uint32) cgcpStatus << endl; + g_eventLogger.error("System crash due to GCP Stop in state = %u", + (Uint32) cgcpStatus); #endif crashSystemAtGcpStop(signal); return; @@ -7951,7 +7953,7 @@ void Dbdih::GCP_SAVEhandling(Signal* signal, Uint32 nodeId) getNodeState().startLevel == NodeState::SL_STARTED){ jam(); #if 0 - ndbout_c("Dbdih: Clearing initial start ongoing"); + g_eventLogger.info("Dbdih: Clearing initial start ongoing"); #endif Sysfile::clearInitialStartOngoing(SYSFILE->systemRestartBits); } @@ -7970,7 +7972,7 @@ void Dbdih::execGCP_PREPARE(Signal* signal) if (ERROR_INSERTED(7030)) { cgckptflag = true; - ndbout_c("Delayed GCP_PREPARE 5s"); + g_eventLogger.info("Delayed GCP_PREPARE 5s"); sendSignalWithDelay(reference(), GSN_GCP_PREPARE, signal, 5000, signal->getLength()); return; @@ -7990,7 +7992,7 @@ void Dbdih::execGCP_PREPARE(Signal* signal) if (ERROR_INSERTED(7031)) { - ndbout_c("Crashing delayed in GCP_PREPARE 3s"); + g_eventLogger.info("Crashing delayed in GCP_PREPARE 3s"); signal->theData[0] = 9999; sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 3000, 1); return; @@ -8514,7 +8516,7 @@ void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId) * This is LCP master takeover */ #ifdef VM_TRACE - ndbout_c("initLcpLab aborted due to LCP master takeover - 1"); + g_eventLogger.info("initLcpLab aborted due to LCP master takeover - 1"); #endif c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__); sendMASTER_LCPCONF(signal); @@ -8527,7 +8529,7 @@ void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId) * Master take over but has not yet received MASTER_LCPREQ */ #ifdef VM_TRACE - ndbout_c("initLcpLab aborted due to LCP master takeover - 2"); + g_eventLogger.info("initLcpLab aborted due to LCP master takeover - 2"); #endif return; } @@ -9836,9 +9838,10 @@ void Dbdih::checkTcCounterLab(Signal* signal) { CRASH_INSERTION(7009); if (c_lcpState.lcpStatus != LCP_STATUS_IDLE) { - ndbout << "lcpStatus = " << (Uint32) c_lcpState.lcpStatus; - ndbout << "lcpStatusUpdatedPlace = " << - c_lcpState.lcpStatusUpdatedPlace << endl; + g_eventLogger.error("lcpStatus = %u" + "lcpStatusUpdatedPlace = %d", + (Uint32) c_lcpState.lcpStatus, + c_lcpState.lcpStatusUpdatedPlace); ndbrequire(false); return; }//if @@ -10421,9 +10424,8 @@ void Dbdih::execLCP_FRAG_REP(Signal* signal) if(tabPtr.p->tabStatus == TabRecord::TS_DROPPING){ jam(); - ndbout_c("TS_DROPPING - Neglecting to save Table: %d Frag: %d - ", - tableId, - fragId); + g_eventLogger.info("TS_DROPPING - Neglecting to save Table: %d Frag: %d - ", + tableId, fragId); } else { jam(); /** @@ -10553,7 +10555,7 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr, }; #ifdef VM_TRACE - ndbout_c("Fragment Replica(node=%d) not found", nodeId); + g_eventLogger.info("Fragment Replica(node=%d) not found", nodeId); replicaPtr.i = fragPtrP->oldStoredReplicas; while(replicaPtr.i != RNIL){ ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord); @@ -10566,9 +10568,9 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr, }//if }; if(replicaPtr.i != RNIL){ - ndbout_c("...But was found in oldStoredReplicas"); + g_eventLogger.info("...But was found in oldStoredReplicas"); } else { - ndbout_c("...And wasn't found in oldStoredReplicas"); + g_eventLogger.info("...And wasn't found in oldStoredReplicas"); } #endif ndbrequire(false); @@ -10635,8 +10637,8 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport) if(lcpNo != replicaPtr.p->nextLcp){ if (handle_invalid_lcp_no(lcpReport, replicaPtr)) { - ndbout_c("lcpNo = %d replicaPtr.p->nextLcp = %d", - lcpNo, replicaPtr.p->nextLcp); + g_eventLogger.error("lcpNo = %d replicaPtr.p->nextLcp = %d", + lcpNo, replicaPtr.p->nextLcp); ndbrequire(false); } } @@ -10672,7 +10674,7 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport) // Not all fragments in table have been checkpointed. /* ----------------------------------------------------------------- */ if(0) - ndbout_c("reportLcpCompletion: fragment %d not ready", fid); + g_eventLogger.info("reportLcpCompletion: fragment %d not ready", fid); return false; }//if }//for @@ -10789,7 +10791,7 @@ void Dbdih::execLCP_COMPLETE_REP(Signal* signal) jamEntry(); #if 0 - ndbout_c("LCP_COMPLETE_REP"); + g_eventLogger.info("LCP_COMPLETE_REP"); printLCP_COMPLETE_REP(stdout, signal->getDataPtr(), signal->length(), number()); @@ -10875,7 +10877,7 @@ void Dbdih::allNodesLcpCompletedLab(Signal* signal) if(c_lcpMasterTakeOverState.state != LMTOS_IDLE){ jam(); #ifdef VM_TRACE - ndbout_c("Exiting from allNodesLcpCompletedLab"); + g_eventLogger.info("Exiting from allNodesLcpCompletedLab"); #endif return; } @@ -11112,14 +11114,14 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal) infoEvent("Detected GCP stop...sending kill to %s", c_GCP_SAVEREQ_Counter.getText()); - ndbout_c("Detected GCP stop...sending kill to %s", - c_GCP_SAVEREQ_Counter.getText()); + g_eventLogger.error("Detected GCP stop...sending kill to %s", + c_GCP_SAVEREQ_Counter.getText()); return; } case GCP_SAVE_LQH_FINISHED: - ndbout_c("m_copyReason: %d m_waiting: %d", - c_copyGCIMaster.m_copyReason, - c_copyGCIMaster.m_waiting); + g_eventLogger.error("m_copyReason: %d m_waiting: %d", + c_copyGCIMaster.m_copyReason, + c_copyGCIMaster.m_waiting); break; case GCP_READY: // shut up lint case GCP_PREPARE_SENT: @@ -11127,11 +11129,11 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal) break; } - ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d", - c_copyGCISlave.m_senderData, - c_copyGCISlave.m_senderRef, - c_copyGCISlave.m_copyReason, - c_copyGCISlave.m_expectedNextWord); + g_eventLogger.error("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d", + c_copyGCISlave.m_senderData, + c_copyGCISlave.m_senderRef, + c_copyGCISlave.m_copyReason, + c_copyGCISlave.m_expectedNextWord); FileRecordPtr file0Ptr; file0Ptr.i = crestartInfoFile[0]; @@ -13350,9 +13352,9 @@ void Dbdih::setLcpActiveStatusEnd() nodePtr.i = getOwnNodeId(); ptrAss(nodePtr, nodeRecord); ndbrequire(nodePtr.p->activeStatus == Sysfile::NS_Active); - ndbout_c("NR: setLcpActiveStatusEnd - m_participatingLQH"); + g_eventLogger.info("NR: setLcpActiveStatusEnd - m_participatingLQH"); } else { - ndbout_c("NR: setLcpActiveStatusEnd - !m_participatingLQH"); + g_eventLogger.info("NR: setLcpActiveStatusEnd - !m_participatingLQH"); } } @@ -14184,8 +14186,8 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) } if(arg == DumpStateOrd::EnableUndoDelayDataWrite){ - ndbout << "Dbdih:: delay write of datapages for table = " - << dumpState->args[1]<< endl; + g_eventLogger.info("Dbdih:: delay write of datapages for table = %s", + dumpState->args[1]); // Send this dump to ACC and TUP EXECUTE_DIRECT(DBACC, GSN_DUMP_STATE_ORD, signal, 2); EXECUTE_DIRECT(DBTUP, GSN_DUMP_STATE_ORD, signal, 2); @@ -14202,13 +14204,13 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) }//if if (signal->theData[0] == DumpStateOrd::DihMinTimeBetweenLCP) { // Set time between LCP to min value - ndbout << "Set time between LCP to min value" << endl; + g_eventLogger.info("Set time between LCP to min value"); c_lcpState.clcpDelay = 0; // TimeBetweenLocalCheckpoints.min return; } if (signal->theData[0] == DumpStateOrd::DihMaxTimeBetweenLCP) { // Set time between LCP to max value - ndbout << "Set time between LCP to max value" << endl; + g_eventLogger.info("Set time between LCP to max value"); c_lcpState.clcpDelay = 31; // TimeBetweenLocalCheckpoints.max return; } @@ -14244,7 +14246,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) { cgcpDelay = signal->theData[1]; } - ndbout_c("Setting time between gcp : %d", cgcpDelay); + g_eventLogger.info("Setting time between gcp : %d", cgcpDelay); } if (arg == 7021 && signal->getLength() == 2) @@ -14367,7 +14369,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){ while(index < count){ if(nodePtr.p->queuedChkpt[index].tableId == tabPtr.i){ jam(); - // ndbout_c("Unqueuing %d", index); + // g_eventLogger.info("Unqueuing %d", index); count--; for(Uint32 i = index; i<count; i++){ @@ -14407,7 +14409,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){ if(checkLcpAllTablesDoneInLqh()){ jam(); - ndbout_c("This is the last table"); + g_eventLogger.info("This is the last table"); /** * Then check if saving of tab info is done for all tables @@ -14416,7 +14418,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){ checkLcpCompletedLab(signal); if(a != c_lcpState.lcpStatus){ - ndbout_c("And all tables are written to already written disk"); + g_eventLogger.info("And all tables are written to already written disk"); } } break; diff --git a/storage/ndb/src/kernel/vm/WatchDog.cpp b/storage/ndb/src/kernel/vm/WatchDog.cpp index d8311ec5d35..d1abb709b1e 100644 --- a/storage/ndb/src/kernel/vm/WatchDog.cpp +++ b/storage/ndb/src/kernel/vm/WatchDog.cpp @@ -22,7 +22,10 @@ #include <NdbOut.hpp> #include <NdbSleep.h> #include <ErrorHandlingMacros.hpp> - +#include <EventLogger.hpp> + +extern EventLogger g_eventLogger; + extern "C" void* runWatchDog(void* w){ @@ -125,7 +128,7 @@ WatchDog::run(){ last_stuck_action = "Unknown place"; break; }//switch - ndbout << "Ndb kernel is stuck in: " << last_stuck_action << endl; + g_eventLogger.warning("Ndb kernel is stuck in: %s", last_stuck_action); if(alerts == 3){ shutdownSystem(last_stuck_action); } diff --git a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp index 5560259a957..38223502175 100644 --- a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -701,7 +701,7 @@ int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond) return WRONG_PROCESS_TYPE; // Check if we have contact with it if(unCond){ - if(theFacade->theClusterMgr->getNodeInfo(nodeId).connected) + if(theFacade->theClusterMgr->getNodeInfo(nodeId).m_api_reg_conf) return 0; } else if (theFacade->get_node_alive(nodeId) == true) @@ -1577,32 +1577,85 @@ MgmtSrvr::status(int nodeId, } int -MgmtSrvr::setEventReportingLevelImpl(int nodeId, +MgmtSrvr::setEventReportingLevelImpl(int nodeId_arg, const EventSubscribeReq& ll) { SignalSender ss(theFacade); - ss.lock(); - - SimpleSignal ssig; - EventSubscribeReq * dst = - CAST_PTR(EventSubscribeReq, ssig.getDataPtrSend()); - ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_EVENT_SUBSCRIBE_REQ, - EventSubscribeReq::SignalLength); - *dst = ll; - - NodeBitmask nodes; + NdbNodeBitmask nodes; + int retries = 30; nodes.clear(); - Uint32 max = (nodeId == 0) ? (nodeId = 1, MAX_NDB_NODES) : nodeId; - for(; (Uint32) nodeId <= max; nodeId++) + while (1) { - if (nodeTypes[nodeId] != NODE_TYPE_DB) - continue; - if (okToSendTo(nodeId, true)) - continue; - if (ss.sendSignal(nodeId, &ssig) == SEND_OK) + Uint32 nodeId, max; + ss.lock(); + SimpleSignal ssig; + EventSubscribeReq * dst = + CAST_PTR(EventSubscribeReq, ssig.getDataPtrSend()); + ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_EVENT_SUBSCRIBE_REQ, + EventSubscribeReq::SignalLength); + *dst = ll; + + if (nodeId_arg == 0) { - nodes.set(nodeId); + // all nodes + nodeId = 1; + max = MAX_NDB_NODES; + } + else + { + // only one node + max = nodeId = nodeId_arg; + } + // first make sure nodes are sendable + for(; nodeId <= max; nodeId++) + { + if (nodeTypes[nodeId] != NODE_TYPE_DB) + continue; + if (okToSendTo(nodeId, true)) + { + if (theFacade->theClusterMgr->getNodeInfo(nodeId).connected == false) + { + // node not connected we can safely skip this one + continue; + } + // api_reg_conf not recevied yet, need to retry + break; + } + } + if (nodeId <= max) + { + if (--retries) + { + ss.unlock(); + NdbSleep_MilliSleep(100); + continue; + } + return SEND_OR_RECEIVE_FAILED; + } + + if (nodeId_arg == 0) + { + // all nodes + nodeId = 1; + max = MAX_NDB_NODES; + } + else + { + // only one node + max = nodeId = nodeId_arg; } + // now send to all sendable nodes nodes + // note, lock is held, so states have not changed + for(; (Uint32) nodeId <= max; nodeId++) + { + if (nodeTypes[nodeId] != NODE_TYPE_DB) + continue; + if (theFacade->theClusterMgr->getNodeInfo(nodeId).connected == false) + continue; // node is not connected, skip + if (ss.sendSignal(nodeId, &ssig) == SEND_OK) + nodes.set(nodeId); + } + break; } if (nodes.isclear()) @@ -1613,6 +1666,7 @@ MgmtSrvr::setEventReportingLevelImpl(int nodeId, int error = 0; while (!nodes.isclear()) { + Uint32 nodeId; SimpleSignal *signal = ss.waitFor(); int gsn = signal->readSignalNumber(); nodeId = refToNode(signal->header.theSendersBlockRef); diff --git a/storage/ndb/src/ndbapi/ClusterMgr.cpp b/storage/ndb/src/ndbapi/ClusterMgr.cpp index b162b85d61e..2a794f69ecb 100644 --- a/storage/ndb/src/ndbapi/ClusterMgr.cpp +++ b/storage/ndb/src/ndbapi/ClusterMgr.cpp @@ -313,7 +313,7 @@ ClusterMgr::showState(NodeId nodeId){ ClusterMgr::Node::Node() : m_state(NodeState::SL_NOTHING) { compatible = nfCompleteRep = true; - connected = defined = m_alive = false; + connected = defined = m_alive = m_api_reg_conf = false; m_state.m_connected_nodes.clear(); } @@ -385,6 +385,8 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){ node.m_info.m_version); } + node.m_api_reg_conf = true; + node.m_state = apiRegConf->nodeState; if (node.compatible && (node.m_state.startLevel == NodeState::SL_STARTED || node.m_state.startLevel == NodeState::SL_SINGLEUSER)){ @@ -501,6 +503,7 @@ ClusterMgr::reportDisconnected(NodeId nodeId){ noOfConnectedNodes--; theNodes[nodeId].connected = false; + theNodes[nodeId].m_api_reg_conf = false; theNodes[nodeId].m_state.m_connected_nodes.clear(); reportNodeFailed(nodeId, true); diff --git a/storage/ndb/src/ndbapi/ClusterMgr.hpp b/storage/ndb/src/ndbapi/ClusterMgr.hpp index bb20d447c0c..6e74620dd4f 100644 --- a/storage/ndb/src/ndbapi/ClusterMgr.hpp +++ b/storage/ndb/src/ndbapi/ClusterMgr.hpp @@ -70,6 +70,7 @@ public: bool compatible; // Version is compatible bool nfCompleteRep; // NF Complete Rep has arrived bool m_alive; // Node is alive + bool m_api_reg_conf;// API_REGCONF has arrived NodeInfo m_info; NodeState m_state; diff --git a/storage/ndb/src/ndbapi/SignalSender.hpp b/storage/ndb/src/ndbapi/SignalSender.hpp index ec874e63c52..4cad759a334 100644 --- a/storage/ndb/src/ndbapi/SignalSender.hpp +++ b/storage/ndb/src/ndbapi/SignalSender.hpp @@ -32,7 +32,7 @@ public: Uint32 theData[25]; LinearSectionPtr ptr[3]; - int readSignalNumber() {return header.theVerId_signalNumber; } + int readSignalNumber() const {return header.theVerId_signalNumber; } Uint32 *getDataPtrSend() { return theData; } const Uint32 *getDataPtr() const { return theData; } |