diff options
53 files changed, 1063 insertions, 512 deletions
diff --git a/config/ac-macros/ha_ndbcluster.m4 b/config/ac-macros/ha_ndbcluster.m4 index 509cd868909..2ff598242e4 100644 --- a/config/ac-macros/ha_ndbcluster.m4 +++ b/config/ac-macros/ha_ndbcluster.m4 @@ -140,7 +140,7 @@ AC_DEFUN([MYSQL_CHECK_NDBCLUSTER], [ AC_MSG_RESULT([Using NDB Cluster]) AC_DEFINE([HAVE_NDBCLUSTER_DB], [1], [Using Ndb Cluster DB]) have_ndbcluster="yes" - ndbcluster_includes="-I../ndb/include -I../ndb/include/ndbapi" + ndbcluster_includes="-I../ndb/include -I../ndb/include/ndbapi -I../ndb/include/mgmapi" ndbcluster_libs="\$(top_builddir)/ndb/src/.libs/libndbclient.a" ndbcluster_system_libs="" ndb_mgmclient_libs="\$(top_builddir)/ndb/src/mgmclient/libndbmgmclient.la" diff --git a/extra/perror.c b/extra/perror.c index dedd558e4cf..919088ba42e 100644 --- a/extra/perror.c +++ b/extra/perror.c @@ -25,6 +25,7 @@ #include <my_getopt.h> #ifdef HAVE_NDBCLUSTER_DB #include "../ndb/src/ndbapi/ndberror.c" +#include "../ndb/src/kernel/error/ndbd_exit_codes.c" #endif static my_bool verbose, print_all_codes; @@ -235,8 +236,11 @@ int main(int argc,char *argv[]) #ifdef HAVE_NDBCLUSTER_DB if (ndb_code) { - if (ndb_error_string(code, ndb_string, sizeof(ndb_string)) < 0) - msg= 0; + if ((ndb_error_string(code, ndb_string, sizeof(ndb_string)) < 0) && + (ndbd_exit_string(code, ndb_string, sizeof(ndb_string)) < 0)) + { + msg= 0; + } else msg= ndb_string; } diff --git a/ndb/include/Makefile.am b/ndb/include/Makefile.am index 10f297492e9..240101c2004 100644 --- a/ndb/include/Makefile.am +++ b/ndb/include/Makefile.am @@ -33,7 +33,8 @@ mgmapi/mgmapi.h \ mgmapi/mgmapi_debug.h \ mgmapi/mgmapi_config_parameters.h \ mgmapi/mgmapi_config_parameters_debug.h \ -mgmapi/ndb_logevent.h +mgmapi/ndb_logevent.h \ +mgmapi/ndbd_exit_codes.h noinst_HEADERS = \ ndb_global.h \ diff --git a/ndb/include/kernel/signaldata/EventReport.hpp b/ndb/include/kernel/signaldata/EventReport.hpp index 9822a0539cf..e1cdbcfd753 100644 --- a/ndb/include/kernel/signaldata/EventReport.hpp +++ b/ndb/include/kernel/signaldata/EventReport.hpp @@ -68,6 +68,8 @@ public: 4) Add SentHeartbeat in EventLogger::getText() */ + void setNodeId(Uint32 nodeId); + Uint32 getNodeId() const; void setEventType(Ndb_logevent_type type); Ndb_logevent_type getEventType() const; UintR eventType; // DATA 0 @@ -75,14 +77,26 @@ public: inline void +EventReport::setNodeId(Uint32 nodeId){ + eventType = (nodeId << 16) | (eventType & 0xFFFF); +} + +inline +Uint32 +EventReport::getNodeId() const { + return eventType >> 16; +} + +inline +void EventReport::setEventType(Ndb_logevent_type type){ - eventType = (UintR) type; + eventType = (eventType & 0xFFFF0000) | (((UintR) type) & 0xFFFF); } inline Ndb_logevent_type EventReport::getEventType() const { - return (Ndb_logevent_type)eventType; + return (Ndb_logevent_type)(eventType & 0xFFFF); } #endif diff --git a/ndb/include/kernel/signaldata/FsRef.hpp b/ndb/include/kernel/signaldata/FsRef.hpp index 2f7038de4ec..a0e1dc55dae 100644 --- a/ndb/include/kernel/signaldata/FsRef.hpp +++ b/ndb/include/kernel/signaldata/FsRef.hpp @@ -17,6 +17,7 @@ #ifndef FS_REF_H #define FS_REF_H +#include <ndbd_exit_codes.h> #include "SignalData.hpp" /** @@ -37,17 +38,15 @@ struct FsRef { */ enum NdbfsErrorCodeType { fsErrNone=0, - fsErrHardwareFailed=1, - fsErrUserError=2, - fsErrEnvironmentError=3, - fsErrTemporaryNotAccessible=4, - fsErrNoSpaceLeftOnDevice=5, - fsErrPermissionDenied=6, - fsErrInvalidParameters=7, - fsErrUnknown=8, - fsErrNoMoreResources=9, - fsErrFileDoesNotExist=10, - fsErrReadUnderflow = 11, + fsErrEnvironmentError=NDBD_EXIT_AFS_ENVIRONMENT, + fsErrTemporaryNotAccessible=NDBD_EXIT_AFS_TEMP_NO_ACCESS, + fsErrNoSpaceLeftOnDevice=NDBD_EXIT_AFS_DISK_FULL, + fsErrPermissionDenied=NDBD_EXIT_AFS_PERMISSION_DENIED, + fsErrInvalidParameters=NDBD_EXIT_AFS_INVALID_PARAM, + fsErrUnknown=NDBD_EXIT_AFS_UNKNOWN, + fsErrNoMoreResources=NDBD_EXIT_AFS_NO_MORE_RESOURCES, + fsErrFileDoesNotExist=NDBD_EXIT_AFS_NO_SUCH_FILE, + fsErrReadUnderflow = NDBD_EXIT_AFS_READ_UNDERFLOW, fsErrMax }; /** diff --git a/ndb/include/mgmapi/mgmapi.h b/ndb/include/mgmapi/mgmapi.h index 924d65c2847..36dee7193c7 100644 --- a/ndb/include/mgmapi/mgmapi.h +++ b/ndb/include/mgmapi/mgmapi.h @@ -1022,6 +1022,8 @@ extern "C" { int param, const char ** value); int ndb_mgm_purge_stale_sessions(NdbMgmHandle handle, char **); int ndb_mgm_check_connection(NdbMgmHandle handle); + + int ndb_mgm_report_event(NdbMgmHandle handle, Uint32 *data, Uint32 length); #endif #ifndef DOXYGEN_SHOULD_SKIP_DEPRECATED diff --git a/ndb/include/mgmapi/ndb_logevent.h b/ndb/include/mgmapi/ndb_logevent.h index b69379545fc..6025ff2725c 100644 --- a/ndb/include/mgmapi/ndb_logevent.h +++ b/ndb/include/mgmapi/ndb_logevent.h @@ -76,6 +76,10 @@ extern "C" { /** NDB_MGM_EVENT_CATEGORY_STARTUP */ NDB_LE_NDBStopStarted = 17, /** NDB_MGM_EVENT_CATEGORY_STARTUP */ + NDB_LE_NDBStopCompleted = 53, + /** NDB_MGM_EVENT_CATEGORY_STARTUP */ + NDB_LE_NDBStopForced = 59, + /** NDB_MGM_EVENT_CATEGORY_STARTUP */ NDB_LE_NDBStopAborted = 18, /** NDB_MGM_EVENT_CATEGORY_STARTUP */ NDB_LE_StartREDOLog = 19, @@ -148,9 +152,12 @@ extern "C" { /** NDB_MGM_EVENT_CATEGORY_INFO */ NDB_LE_InfoEvent = 49, + /* 50 used */ + /* 51 used */ + /* SINGLE USER */ NDB_LE_SingleUser = 52, - /* NDB_LE_ UNUSED = 53, */ + /* 53 used */ /** NDB_MGM_EVENT_CATEGORY_BACKUP */ NDB_LE_BackupStarted = 54, @@ -160,6 +167,13 @@ extern "C" { NDB_LE_BackupCompleted = 56, /** NDB_MGM_EVENT_CATEGORY_BACKUP */ NDB_LE_BackupAborted = 57 + + /* 58 used in 5.1 */ + /* 59 used */ + /* 60 unused */ + /* 61 unused */ + /* 62 unused */ + }; /** @@ -389,6 +403,19 @@ extern "C" { } NDBStopStarted; /** Log event specific data for for corresponding NDB_LE_ log event */ struct { + unsigned action; + unsigned signum; + } NDBStopCompleted; + /** Log event specific data for for corresponding NDB_LE_ log event */ + struct { + unsigned action; + unsigned signum; + unsigned error; + unsigned sphase; + unsigned extra; + } NDBStopForced; + /** Log event specific data for for corresponding NDB_LE_ log event */ + struct { } NDBStopAborted; /** Log event specific data for for corresponding NDB_LE_ log event */ struct { diff --git a/ndb/include/mgmapi/ndbd_exit_codes.h b/ndb/include/mgmapi/ndbd_exit_codes.h new file mode 100644 index 00000000000..30bf1c4b3fe --- /dev/null +++ b/ndb/include/mgmapi/ndbd_exit_codes.h @@ -0,0 +1,143 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef NDBD_EXIT_CODES_H +#define NDBD_EXIT_CODES_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL + +/** + * Exit error codes for NDBD + * + * These errorcodes should be used whenever a condition + * is detected where it's necesssary to shutdown NDB. + * + * Example: When another node fails while a NDB node are performing + * a system restart the node should be shutdown. This + * is kind of an error but the cause of the error is known + * and a proper errormessage describing the problem should + * be printed in error.log. It's therefore important to use + * the proper errorcode. + * + */ + +typedef enum +{ + ndbd_exit_st_success = 0, + ndbd_exit_st_unknown = 1, + ndbd_exit_st_permanent = 2, + ndbd_exit_st_temporary = 3, + ndbd_exit_st_temporary_i = 4, + ndbd_exit_st_bug = 5 +} ndbd_exit_status_enum; + +typedef enum +{ + ndbd_exit_cl_none = 0, + ndbd_exit_cl_unknown = 1, + ndbd_exit_cl_internal_error = 2, + ndbd_exit_cl_configuration_error = 3, + ndbd_exit_cl_arbitration_error = 4, + ndbd_exit_cl_restart_error = 5, + ndbd_exit_cl_resource_configuration_error = 6, + ndbd_exit_cl_filesystem_full_error = 7, + ndbd_exit_cl_filesystem_inconsistency_error = 8 +} ndbd_exit_classification_enum; + +typedef ndbd_exit_status_enum ndbd_exit_status; +typedef ndbd_exit_classification_enum ndbd_exit_classification; + +/* Errorcodes before block division was used */ +#define NDBD_EXIT_PRGERR 2301 +#define NDBD_EXIT_NODE_NOT_IN_CONFIG 2302 +#define NDBD_EXIT_SYSTEM_ERROR 2303 +#define NDBD_EXIT_INDEX_NOTINRANGE 2304 +#define NDBD_EXIT_ARBIT_SHUTDOWN 2305 +#define NDBD_EXIT_POINTER_NOTINRANGE 2306 +#define NDBD_EXIT_SR_OTHERNODEFAILED 2308 +#define NDBD_EXIT_NODE_NOT_DEAD 2309 +#define NDBD_EXIT_SR_REDOLOG 2310 +/* +#define NDBD_EXIT_SR_RESTARTCONFLICT 2311 +*/ +#define NDBD_EXIT_NO_MORE_UNDOLOG 2312 +#define NDBD_EXIT_SR_UNDOLOG 2313 +#define NDBD_EXIT_MEMALLOC 2327 +#define NDBD_EXIT_BLOCK_JBUFCONGESTION 2334 +#define NDBD_EXIT_TIME_QUEUE_SHORT 2335 +#define NDBD_EXIT_TIME_QUEUE_LONG 2336 +#define NDBD_EXIT_TIME_QUEUE_DELAY 2337 +#define NDBD_EXIT_TIME_QUEUE_INDEX 2338 +#define NDBD_EXIT_BLOCK_BNR_ZERO 2339 +#define NDBD_EXIT_WRONG_PRIO_LEVEL 2340 +#define NDBD_EXIT_NDBREQUIRE 2341 +#define NDBD_EXIT_ERROR_INSERT 2342 +#define NDBD_EXIT_NDBASSERT 2343 +#define NDBD_EXIT_INVALID_CONFIG 2350 +#define NDBD_EXIT_OUT_OF_LONG_SIGNAL_MEMORY 2351 + +/* VM 6000-> */ +#define NDBD_EXIT_WATCHDOG_TERMINATE 6000 +#define NDBD_EXIT_SIGNAL_LOST 6001 +#define NDBD_EXIT_SIGNAL_LOST_SEND_BUFFER_FULL 6002 +#define NDBD_EXIT_ILLEGAL_SIGNAL 6003 + +/* TC 6200-> */ +/* DIH 6300-> */ +#define NDBD_EXIT_MAX_CRASHED_REPLICAS 6300 +/* LQH 7200-> */ + + +/* Errorcodes for NDB filesystem */ +#define NDBD_EXIT_AFS_NOPATH 2801 +/* +#define NDBD_EXIT_AFS_CHANNALFULL 2802 +#define NDBD_EXIT_AFS_NOMORETHREADS 2803 +*/ +#define NDBD_EXIT_AFS_PARAMETER 2804 +#define NDBD_EXIT_AFS_INVALIDPATH 2805 +#define NDBD_EXIT_AFS_MAXOPEN 2806 +#define NDBD_EXIT_AFS_ALREADY_OPEN 2807 + +#define NDBD_EXIT_AFS_ENVIRONMENT 2808 +#define NDBD_EXIT_AFS_TEMP_NO_ACCESS 2809 +#define NDBD_EXIT_AFS_DISK_FULL 2810 +#define NDBD_EXIT_AFS_PERMISSION_DENIED 2811 +#define NDBD_EXIT_AFS_INVALID_PARAM 2812 +#define NDBD_EXIT_AFS_UNKNOWN 2813 +#define NDBD_EXIT_AFS_NO_MORE_RESOURCES 2814 +#define NDBD_EXIT_AFS_NO_SUCH_FILE 2815 +#define NDBD_EXIT_AFS_READ_UNDERFLOW 2816 + +const char * +ndbd_exit_message(int faultId, ndbd_exit_classification *cl); +const char * +ndbd_exit_classification_message(ndbd_exit_classification classification, + ndbd_exit_status *status); +const char * +ndbd_exit_status_message(ndbd_exit_status status); + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* NDBD_EXIT_CODES_H */ diff --git a/ndb/src/common/debugger/EventLogger.cpp b/ndb/src/common/debugger/EventLogger.cpp index 9a1dcb8a3e1..f785cda5215 100644 --- a/ndb/src/common/debugger/EventLogger.cpp +++ b/ndb/src/common/debugger/EventLogger.cpp @@ -25,6 +25,8 @@ #include <NodeState.hpp> #include <version.h> +#include <ndbd_exit_codes.h> + // // PUBLIC // @@ -84,6 +86,57 @@ void getTextNDBStopStarted(QQQQ) { "%s shutdown initiated", (theData[1] == 1 ? "Cluster" : "Node")); } +void getRestartAction(Uint32 action, BaseString &str) +{ + if (action == 0) + return; + str.appfmt(", restarting"); + if (action & 2) + str.appfmt(", no start"); + if (action & 4) + str.appfmt(", initial"); +} +void getTextNDBStopCompleted(QQQQ) { + BaseString action_str(""); + BaseString signum_str(""); + getRestartAction(theData[1], action_str); + if (theData[2]) + signum_str.appfmt(" Initiated by signal %d.", theData[2]); + BaseString::snprintf(m_text, m_text_len, + "Node shutdown completed%s.%s", + action_str.c_str(), + signum_str.c_str()); +} +void getTextNDBStopForced(QQQQ) { + BaseString action_str(""); + BaseString reason_str(""); + BaseString sphase_str(""); + int signum = theData[2]; + int error = theData[3]; + int sphase = theData[4]; + int extra = theData[5]; + getRestartAction(theData[1],action_str); + if (signal) + reason_str.appfmt(" Initiated by signal %d.", signum); + if (error) + { + ndbd_exit_classification cl; + ndbd_exit_status st; + const char *msg = ndbd_exit_message(error, &cl); + const char *cl_msg = ndbd_exit_classification_message(cl, &st); + const char *st_msg = ndbd_exit_status_message(st); + reason_str.appfmt(" Caused by error %d: \'%s(%s). %s\'.", + error, msg, cl_msg, st_msg); + if (extra != 0) + reason_str.appfmt(" (extra info %d)", extra); + } + if (sphase < 255) + sphase_str.appfmt(" Occured during startphase %u.", sphase); + BaseString::snprintf(m_text, m_text_len, + "Forced node shutdown completed%s.%s%s", + action_str.c_str(), sphase_str.c_str(), + reason_str.c_str()); +} void getTextNDBStopAborted(QQQQ) { BaseString::snprintf(m_text, m_text_len, "Node shutdown aborted"); @@ -696,6 +749,8 @@ const EventLoggerBase::EventRepLogLevelMatrix EventLoggerBase::matrix[] = { ROW(CM_REGREF, LogLevel::llStartUp, 8, Logger::LL_INFO ), ROW(FIND_NEIGHBOURS, LogLevel::llStartUp, 8, Logger::LL_INFO ), ROW(NDBStopStarted, LogLevel::llStartUp, 1, Logger::LL_INFO ), + ROW(NDBStopCompleted, LogLevel::llStartUp, 1, Logger::LL_INFO ), + ROW(NDBStopForced, LogLevel::llStartUp, 1, Logger::LL_ALERT ), ROW(NDBStopAborted, LogLevel::llStartUp, 1, Logger::LL_INFO ), ROW(StartREDOLog, LogLevel::llStartUp, 10, Logger::LL_INFO ), ROW(StartLog, LogLevel::llStartUp, 10, Logger::LL_INFO ), diff --git a/ndb/src/common/debugger/signaldata/FsRef.cpp b/ndb/src/common/debugger/signaldata/FsRef.cpp index ccf3d6da9c8..ff659208d20 100644 --- a/ndb/src/common/debugger/signaldata/FsRef.cpp +++ b/ndb/src/common/debugger/signaldata/FsRef.cpp @@ -30,42 +30,13 @@ printFSREF(FILE * output, const Uint32 * theData, sig->userPointer); fprintf(output, " ErrorCode: %d, ", sig->errorCode); + ndbd_exit_classification cl; switch (sig->getErrorCode(sig->errorCode)){ case FsRef::fsErrNone: fprintf(output, "No error"); break; - case FsRef::fsErrHardwareFailed: - fprintf(output, "Hardware failure!"); - break; - case FsRef::fsErrUserError: - fprintf(output, "User error!"); - break; - case FsRef::fsErrEnvironmentError: - fprintf(output, "Environment error!"); - break; - case FsRef::fsErrTemporaryNotAccessible: - fprintf(output, "Temporary not accesible!"); - break; - case FsRef::fsErrNoSpaceLeftOnDevice: - fprintf(output, "No space left on device!"); - break; - case FsRef::fsErrPermissionDenied: - fprintf(output, "Permission denied!"); - break; - case FsRef::fsErrInvalidParameters: - fprintf(output, "Invalid parameters!"); - break; - case FsRef::fsErrNoMoreResources: - fprintf(output, "No more resources!"); - break; - case FsRef::fsErrFileDoesNotExist: - fprintf(output, "File does not exist!"); - break; - - case FsRef::fsErrUnknown: default: - fprintf(output, "Unknown!"); - ret = false; + fprintf(output, ndbd_exit_message(sig->getErrorCode(sig->errorCode), &cl)); break; } fprintf(output, "\n"); diff --git a/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp b/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp index 6f6aee6a7f7..9d6c692379a 100644 --- a/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp +++ b/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp @@ -189,6 +189,12 @@ void Cmvmi::execEVENT_REP(Signal* signal) //----------------------------------------------------------------------- EventReport * const eventReport = (EventReport *)&signal->theData[0]; Ndb_logevent_type eventType = eventReport->getEventType(); + Uint32 nodeId= eventReport->getNodeId(); + if (nodeId == 0) + { + nodeId= refToNode(signal->getSendersBlockRef()); + eventReport->setNodeId(nodeId); + } jamEntry(); diff --git a/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp b/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp index f4b084c42fb..df8627ae136 100644 --- a/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp +++ b/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp @@ -56,7 +56,7 @@ Dbacc::remainingUndoPages(){ if (Remaining <= 0){ // No more undolog, crash node progError(__LINE__, - ERR_NO_MORE_UNDOLOG, + NDBD_EXIT_NO_MORE_UNDOLOG, "There are more than 1Mbyte undolog writes outstanding"); } return Remaining; @@ -5303,8 +5303,7 @@ void Dbacc::execDEBUG_SIG(Signal* signal) jamEntry(); expPageptr.i = signal->theData[0]; - progError(__LINE__, - ERR_SR_UNDOLOG); + progError(__LINE__, NDBD_EXIT_SR_UNDOLOG); return; }//Dbacc::execDEBUG_SIG() diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 72051777959..92959eb5552 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -1051,17 +1051,23 @@ void Dbdih::execREAD_CONFIG_REQ(Signal* signal) const ndb_mgm_configuration_iterator * p = theConfiguration.getOwnConfigIterator(); - ndbrequire(p != 0); - - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_API_CONNECT, - &capiConnectFileSize)); - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_CONNECT,&cconnectFileSize)); - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_FRAG_CONNECT, - &cfragstoreFileSize)); - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_REPLICAS, - &creplicaFileSize)); - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_TABLE, &ctabFileSize)) - cfileFileSize = (2 * ctabFileSize) + 2; + ndbrequireErr(p != 0, NDBD_EXIT_INVALID_CONFIG); + + ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_API_CONNECT, + &capiConnectFileSize), + NDBD_EXIT_INVALID_CONFIG); + ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_CONNECT, + &cconnectFileSize), + NDBD_EXIT_INVALID_CONFIG); + ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_FRAG_CONNECT, + &cfragstoreFileSize), + NDBD_EXIT_INVALID_CONFIG); + ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_REPLICAS, + &creplicaFileSize), + NDBD_EXIT_INVALID_CONFIG); + ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_TABLE, &ctabFileSize), + NDBD_EXIT_INVALID_CONFIG); + cfileFileSize = (2 * ctabFileSize) + 2; initRecords(); initialiseRecordsLab(signal, 0, ref, senderData); return; @@ -1466,7 +1472,7 @@ void Dbdih::execREAD_NODESCONF(Signal* signal) " Initial start needs to be performed " " when changing no of storage nodes (node %d)", i); progError(__LINE__, - ERR_INVALID_CONFIG, + NDBD_EXIT_INVALID_CONFIG, buf); } } @@ -3529,7 +3535,7 @@ void Dbdih::selectMasterCandidateAndSend(Signal* signal) " when changing no of replicas (%d != %d)", node_groups[nodePtr.i], cnoReplicas); progError(__LINE__, - ERR_INVALID_CONFIG, + NDBD_EXIT_INVALID_CONFIG, buf); } } @@ -3812,7 +3818,7 @@ void Dbdih::execNODE_FAILREP(Signal* signal) if(getNodeState().getNodeRestartInProgress()){ jam(); progError(__LINE__, - ERR_SYSTEM_ERROR, + NDBD_EXIT_SYSTEM_ERROR, "Unhandle master failure during node restart"); } } @@ -8220,7 +8226,7 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPtr){ /* --------------------------------------------------------------- */ /* THE NODE IS ALIVE AND KICKING AND ACTIVE, LET'S USE IT. */ /* --------------------------------------------------------------- */ - arrGuard(noCrashedReplicas, 8); + arrGuardErr(noCrashedReplicas, 8, NDBD_EXIT_MAX_CRASHED_REPLICAS); Uint32 lastGci = replicaPtr.p->replicaLastGci[noCrashedReplicas]; if(lastGci >= newestRestorableGCI){ jam(); @@ -8700,7 +8706,7 @@ void Dbdih::startFragment(Signal* signal, Uint32 tableId, Uint32 fragId) "table: %d fragment: %d gci: %d", tableId, fragId, SYSFILE->newestRestorableGCI); progError(__LINE__, - ERR_SYSTEM_ERROR, + NDBD_EXIT_SYSTEM_ERROR, buf); ndbrequire(false); return; @@ -10582,7 +10588,7 @@ void Dbdih::checkEscalation() jam(); if (TnodeGroup[i] == ZFALSE) { jam(); - progError(__LINE__, ERR_SYSTEM_ERROR, "Lost node group"); + progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, "Lost node group"); }//if }//for }//Dbdih::checkEscalation() diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index 365c28f1229..b9efd54b248 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -14910,7 +14910,7 @@ void Dblqh::execDEBUG_SIG(Signal* signal) signal->theData[2], signal->theData[3], signal->theData[4], signal->theData[5], signal->theData[6], signal->theData[7]); - progError(__LINE__, ERR_SR_REDOLOG, buf); + progError(__LINE__, NDBD_EXIT_SR_REDOLOG, buf); return; }//Dblqh::execDEBUG_SIG() @@ -15800,7 +15800,7 @@ void Dblqh::buildLinkedLogPageList(Signal* signal) // Uint32 checkSum = bllLogPagePtr.p->logPageWord[ZPOS_CHECKSUM]; // if (checkSum != calcCheckSum) { // ndbout << "Redolog: Checksum failure." << endl; -// progError(__LINE__, ERR_NDBREQUIRE, "Redolog: Checksum failure."); +// progError(__LINE__, NDBD_EXIT_NDBREQUIRE, "Redolog: Checksum failure."); // } // #endif @@ -18519,7 +18519,7 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal) if(arg== 2305) { - progError(__LINE__, ERR_SYSTEM_ERROR, + progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, "Shutting down node due to failed handling of GCP_SAVEREQ"); } diff --git a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp index e50e6bd242b..e4126031a83 100644 --- a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +++ b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp @@ -49,6 +49,10 @@ #include <NdbOut.hpp> #include <NdbTick.h> +// used during shutdown for reporting current startphase +// accessed from Emulator.cpp, NdbShutdown() +Uint32 g_currentStartPhase; + /** * ALL_BLOCKS Used during start phases and while changing node state * @@ -117,7 +121,7 @@ void Ndbcntr::execCONTINUEB(Signal* signal) else tmp.appfmt(" %d", to_3); - progError(__LINE__, ERR_SYSTEM_ERROR, tmp.c_str()); + progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, tmp.c_str()); } signal->theData[0] = ZSTARTUP; @@ -192,7 +196,7 @@ void Ndbcntr::execSYSTEM_ERROR(Signal* signal) } progError(__LINE__, - ERR_SYSTEM_ERROR, + NDBD_EXIT_SYSTEM_ERROR, buf); return; }//Ndbcntr::execSYSTEM_ERROR() @@ -1402,21 +1406,21 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal) if(tMasterFailed){ progError(__LINE__, - ERR_SR_OTHERNODEFAILED, + NDBD_EXIT_SR_OTHERNODEFAILED, "Unhandled node failure during restart"); } if(tStartConf && tStarting){ // One of other starting nodes has crashed... progError(__LINE__, - ERR_SR_OTHERNODEFAILED, + NDBD_EXIT_SR_OTHERNODEFAILED, "Unhandled node failure of starting node during restart"); } if(tStartConf && tStarted){ // One of other started nodes has crashed... progError(__LINE__, - ERR_SR_OTHERNODEFAILED, + NDBD_EXIT_SR_OTHERNODEFAILED, "Unhandled node failure of started node during restart"); } @@ -2513,11 +2517,12 @@ void Ndbcntr::Missra::execSTTORRY(Signal* signal){ void Ndbcntr::Missra::sendNextSTTOR(Signal* signal){ - for(; currentStartPhase < 255 ; currentStartPhase++){ + for(; currentStartPhase < 255 ; + currentStartPhase++, g_currentStartPhase = currentStartPhase){ jam(); const Uint32 start = currentBlockIndex; - + if (currentStartPhase == ZSTART_PHASE_6) { // Ndbd has passed the critical startphases. diff --git a/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp b/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp index 45073b63a5d..1457dd31abb 100644 --- a/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp +++ b/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp @@ -18,7 +18,6 @@ #include <my_sys.h> #include <my_pthread.h> -#include <Error.hpp> #include "AsyncFile.hpp" #include <ErrorHandlingMacros.hpp> diff --git a/ndb/src/kernel/blocks/ndbfs/Filename.cpp b/ndb/src/kernel/blocks/ndbfs/Filename.cpp index 15158ec19ef..3709751cf8e 100644 --- a/ndb/src/kernel/blocks/ndbfs/Filename.cpp +++ b/ndb/src/kernel/blocks/ndbfs/Filename.cpp @@ -20,7 +20,6 @@ #include "Filename.hpp" #include "ErrorHandlingMacros.hpp" -#include "Error.hpp" #include "RefConvert.hpp" #include "DebuggerNames.hpp" @@ -52,7 +51,7 @@ Filename::init(Uint32 nodeid, DBUG_ENTER("Filename::init"); if (pFileSystemPath == NULL) { - ERROR_SET(fatal, AFS_ERROR_NOPATH, ""," Filename::init()"); + ERROR_SET(fatal, NDBD_EXIT_AFS_NOPATH, ""," Filename::init()"); return; } @@ -109,7 +108,7 @@ Filename::set(BlockReference blockReference, { const char* blockName = getBlockName( refToBlock(blockReference) ); if (blockName == NULL){ - ERROR_SET(ecError, AFS_ERROR_PARAMETER,"","No Block Name"); + ERROR_SET(ecError, NDBD_EXIT_AFS_PARAMETER,"","No Block Name"); return; } BaseString::snprintf(buf, sizeof(buf), "%s%s", blockName, DIR_SEPARATOR); @@ -165,7 +164,7 @@ Filename::set(BlockReference blockReference, const Uint32 diskNo = FsOpenReq::v1_getDisk(filenumber); if(diskNo == 0xFF){ - ERROR_SET(ecError, AFS_ERROR_PARAMETER,"","Invalid disk specification"); + ERROR_SET(ecError, NDBD_EXIT_AFS_PARAMETER,"","Invalid disk specification"); } BaseString::snprintf(buf, sizeof(buf), "D%d%s", diskNo, DIR_SEPARATOR); @@ -174,10 +173,10 @@ Filename::set(BlockReference blockReference, } break; default: - ERROR_SET(ecError, AFS_ERROR_PARAMETER,"","Wrong version"); + ERROR_SET(ecError, NDBD_EXIT_AFS_PARAMETER,"","Wrong version"); } if (type >= noOfExtensions){ - ERROR_SET(ecError, AFS_ERROR_PARAMETER,"","File Type doesn't exist"); + ERROR_SET(ecError, NDBD_EXIT_AFS_PARAMETER,"","File Type doesn't exist"); return; } strcat(theName, fileExtension[type]); diff --git a/ndb/src/kernel/blocks/ndbfs/MemoryChannel.hpp b/ndb/src/kernel/blocks/ndbfs/MemoryChannel.hpp index 9037bbad765..f46cc66fe16 100644 --- a/ndb/src/kernel/blocks/ndbfs/MemoryChannel.hpp +++ b/ndb/src/kernel/blocks/ndbfs/MemoryChannel.hpp @@ -70,7 +70,6 @@ #else #include "ErrorHandlingMacros.hpp" -#include "Error.hpp" #include "CircularIndex.hpp" #include "NdbMutex.h" #include "NdbCondition.h" diff --git a/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp b/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp index d6b19c8f872..784a46fe9c9 100644 --- a/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp +++ b/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp @@ -19,7 +19,6 @@ #include "Ndbfs.hpp" #include "AsyncFile.hpp" #include "Filename.hpp" -#include "Error.hpp" #include <signaldata/FsOpenReq.hpp> #include <signaldata/FsCloseReq.hpp> @@ -557,7 +556,7 @@ Ndbfs::createAsyncFile(){ AsyncFile* file = theFiles[i]; ndbout_c("%2d (0x%x): %s", i, file, file->isOpen()?"OPEN":"CLOSED"); } - ERROR_SET(fatal, AFS_ERROR_MAXOPEN,""," Ndbfs::createAsyncFile"); + ERROR_SET(fatal, NDBD_EXIT_AFS_MAXOPEN,""," Ndbfs::createAsyncFile"); } AsyncFile* file = new AsyncFile; diff --git a/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp b/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp index 0fee687f1bc..eacda6ec77d 100644 --- a/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp +++ b/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp @@ -88,7 +88,7 @@ inline bool OpenFiles::insert(AsyncFile* file, Uint16 id){ names.assfmt("open: >%s< existing: >%s<", file->theFileName.c_str(), m_files[i].m_file->theFileName.c_str()); - ERROR_SET(fatal, AFS_ERROR_ALLREADY_OPEN, names.c_str(), + ERROR_SET(fatal, NDBD_EXIT_AFS_ALREADY_OPEN, names.c_str(), "OpenFiles::insert()"); } } diff --git a/ndb/src/kernel/blocks/ndbfs/VoidFs.cpp b/ndb/src/kernel/blocks/ndbfs/VoidFs.cpp index d093089acfc..3e79fec0f52 100644 --- a/ndb/src/kernel/blocks/ndbfs/VoidFs.cpp +++ b/ndb/src/kernel/blocks/ndbfs/VoidFs.cpp @@ -20,7 +20,6 @@ #include "Ndbfs.hpp" #include "AsyncFile.hpp" #include "Filename.hpp" -#include "Error.hpp" #include <signaldata/FsOpenReq.hpp> #include <signaldata/FsCloseReq.hpp> diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 0f736c54555..e0eb40e5528 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -700,11 +700,11 @@ void Qmgr::execCM_REGREF(Signal* signal) break; case CmRegRef::ZNOT_IN_CFG: jam(); - progError(__LINE__, ERR_NODE_NOT_IN_CONFIG); + progError(__LINE__, NDBD_EXIT_NODE_NOT_IN_CONFIG); break; case CmRegRef::ZNOT_DEAD: jam(); - progError(__LINE__, ERR_NODE_NOT_DEAD); + progError(__LINE__, NDBD_EXIT_NODE_NOT_DEAD); break; case CmRegRef::ZELECTION: jam(); @@ -2680,7 +2680,7 @@ void Qmgr::systemErrorBecauseOtherNodeFailed(Signal* signal, Uint32 line, "Node was shutdown during startup because node %d failed", failedNodeId); - progError(line, ERR_SR_OTHERNODEFAILED, buf); + progError(line, NDBD_EXIT_SR_OTHERNODEFAILED, buf); } @@ -3786,7 +3786,8 @@ Qmgr::stateArbitCrash(Signal* signal) if (! (arbitRec.getTimediff() > getArbitTimeout())) return; #endif - progError(__LINE__, ERR_ARBIT_SHUTDOWN, "Arbitrator decided to shutdown this node"); + progError(__LINE__, NDBD_EXIT_ARBIT_SHUTDOWN, + "Arbitrator decided to shutdown this node"); } /** diff --git a/ndb/src/kernel/error/Error.hpp b/ndb/src/kernel/error/Error.hpp deleted file mode 100644 index e19d6782793..00000000000 --- a/ndb/src/kernel/error/Error.hpp +++ /dev/null @@ -1,85 +0,0 @@ -/* Copyright (C) 2003 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -#ifndef ERROR_H -#define ERROR_H - -/** - * Errorcodes for NDB - * - * These errorcodes should be used whenever a condition - * is detected where it's necesssary to shutdown NDB. - * - * Example: When another node fails while a NDB node are performing - * a system restart the node should be shutdown. This - * is kind of an error but the cause of the error is known - * and a proper errormessage describing the problem should - * be printed in error.log. It's therefore important to use - * the proper errorcode. - * - * TODO: In the future the errorcodes should be classified - * - */ - -enum ErrorCategory -{ - warning, - ecError, - fatal, - assert -}; - -const int ERR_BASE = 1000; - -// Errorcodes for all blocks except filseystem -const int ERR_ERR_BASE = ERR_BASE + 1300; -const int ERR_ERROR_PRGERR = ERR_ERR_BASE+1; -const int ERR_NODE_NOT_IN_CONFIG = ERR_ERR_BASE+2; -const int ERR_SYSTEM_ERROR = ERR_ERR_BASE+3; -const int ERR_INDEX_NOTINRANGE = ERR_ERR_BASE+4; -const int ERR_ARBIT_SHUTDOWN = ERR_ERR_BASE+5; -const int ERR_POINTER_NOTINRANGE = ERR_ERR_BASE+6; -const int ERR_PROGRAMERROR = ERR_ERR_BASE+7; -const int ERR_SR_OTHERNODEFAILED = ERR_ERR_BASE+8; -const int ERR_NODE_NOT_DEAD = ERR_ERR_BASE+9; -const int ERR_SR_REDOLOG = ERR_ERR_BASE+10; -const int ERR_SR_RESTARTCONFLICT = ERR_ERR_BASE+11; -const int ERR_NO_MORE_UNDOLOG = ERR_ERR_BASE+12; -const int ERR_SR_UNDOLOG = ERR_ERR_BASE+13; -const int ERR_MEMALLOC = ERR_ERR_BASE+27; -const int BLOCK_ERROR_JBUFCONGESTION = ERR_ERR_BASE+34; -const int ERROR_TIME_QUEUE_SHORT = ERR_ERR_BASE+35; -const int ERROR_TIME_QUEUE_LONG = ERR_ERR_BASE+36; -const int ERROR_TIME_QUEUE_DELAY = ERR_ERR_BASE+37; -const int ERROR_TIME_QUEUE_INDEX = ERR_ERR_BASE+38; -const int BLOCK_ERROR_BNR_ZERO = ERR_ERR_BASE+39; -const int ERROR_WRONG_PRIO_LEVEL = ERR_ERR_BASE+40; -const int ERR_NDBREQUIRE = ERR_ERR_BASE+41; -const int ERR_ERROR_INSERT = ERR_ERR_BASE+42; -const int ERR_INVALID_CONFIG = ERR_ERR_BASE+50; -const int ERR_OUT_OF_LONG_SIGNAL_MEMORY = ERR_ERR_BASE+51; - -// Errorcodes for NDB filesystem -const int AFS_ERR_BASE = ERR_BASE + 1800; -const int AFS_ERROR_NOPATH = AFS_ERR_BASE+1; -const int AFS_ERROR_CHANNALFULL = AFS_ERR_BASE+2; -const int AFS_ERROR_NOMORETHREADS = AFS_ERR_BASE+3; -const int AFS_ERROR_PARAMETER = AFS_ERR_BASE+4; -const int AFS_ERROR_INVALIDPATH = AFS_ERR_BASE+5; -const int AFS_ERROR_MAXOPEN = AFS_ERR_BASE+6; -const int AFS_ERROR_ALLREADY_OPEN = AFS_ERR_BASE+7; - -#endif // ERROR_H diff --git a/ndb/src/kernel/error/ErrorHandlingMacros.hpp b/ndb/src/kernel/error/ErrorHandlingMacros.hpp index d8bb7ff759b..8c3454b1ba1 100644 --- a/ndb/src/kernel/error/ErrorHandlingMacros.hpp +++ b/ndb/src/kernel/error/ErrorHandlingMacros.hpp @@ -17,22 +17,27 @@ #ifndef ERRORHANDLINGMACROS_H #define ERRORHANDLINGMACROS_H +#include <ndbd_exit_codes.h> #include "ErrorReporter.hpp" -#include "Error.hpp" extern const char programName[]; -#define ERROR_SET_SIGNAL(messageCategory, messageID, problemData, objectRef) \ - ErrorReporter::handleError(messageCategory, messageID, problemData, objectRef, NST_ErrorHandlerSignal) -#define ERROR_SET(messageCategory, messageID, problemData, objectRef) \ - ErrorReporter::handleError(messageCategory, messageID, problemData, objectRef) +enum NotUsed +{ + warning, + ecError, + fatal, + assert +}; + +#define ERROR_SET_SIGNAL(not_used, messageID, problemData, objectRef) \ + ErrorReporter::handleError(messageID, problemData, objectRef, NST_ErrorHandlerSignal) +#define ERROR_SET(not_used, messageID, problemData, objectRef) \ + ErrorReporter::handleError(messageID, problemData, objectRef) // Description: // Call ErrorHandler with the supplied arguments. The // ErrorHandler decides how to report the error. // Parameters: - // messageCategory IN A hint to the error handler how the - // error should be reported. Can be - // error, fatal (or warning, use WARNING_SET instead). // messageID IN Code identifying the error. If less // than 1000 a unix error is assumed. If // greater than 1000 the code is treated diff --git a/ndb/src/kernel/error/ErrorMessages.cpp b/ndb/src/kernel/error/ErrorMessages.cpp deleted file mode 100644 index 059aa4af61c..00000000000 --- a/ndb/src/kernel/error/ErrorMessages.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright (C) 2003 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -#include "ErrorMessages.hpp" - -struct ErrStruct { - int fauldId; - const char* text; -}; - -const ErrStruct errArray[] = { - - {2301, "Assertion, probably a programming error"}, - {2302, "Own Node Id not a NDB node, configuration error"}, - {2303, "System error"}, - {2304, "Index too large"}, - {2305, "Arbitrator shutdown"}, - {2306, "Pointer too large"}, - {2307, "Internal program error"}, - {2308, "Node failed during system restart"}, - {2309, "Node state conflict"}, - {2310, "Error while reading the REDO log"}, - {2311, "Conflict when selecting restart type"}, - {2312, "No more free UNDO log"}, - {2313, "Error while reading the datapages and UNDO log"}, - {2327, "Memory allocation failure"}, - {2334, "Job buffer congestion"}, - {2335, "Error in short time queue"}, - {2336, "Error in long time queue"}, - {2337, "Error in time queue, too long delay"}, - {2338, "Time queue index out of range"}, - {2339, "Send signal error"}, - {2340, "Wrong prio level when sending signal"}, - {2341, "Internal program error (failed ndbrequire)"}, - {2342, "Error insert executed" }, - {2350, "Invalid Configuration fetched from Management Server" }, - - // Ndbfs error messages - {2801, "No file system path"}, - {2802, "Channel is full"}, - {2803, "No more threads"}, - {2804, "Bad parameter"}, - {2805, "Illegal file system path"}, - {2806, "Max number of open files exceeded"}, - {2807, "File has already been opened"}, - - // Sentinel - {0, "No message slogan found"} - -}; - -const unsigned short NO_OF_ERROR_MESSAGES = sizeof(errArray)/sizeof(ErrStruct); - -const char* lookupErrorMessage(int faultId) -{ - int i = 0; - while (errArray[i].fauldId != faultId && errArray[i].fauldId != 0) - i++; - return errArray[i].text; -} - - diff --git a/ndb/src/kernel/error/ErrorMessages.hpp b/ndb/src/kernel/error/ErrorMessages.hpp deleted file mode 100644 index 38c8eec636b..00000000000 --- a/ndb/src/kernel/error/ErrorMessages.hpp +++ /dev/null @@ -1,22 +0,0 @@ -/* Copyright (C) 2003 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -#ifndef ERROR_MESSAGES_H -#define ERROR_MESSAGES_H - -const char* lookupErrorMessage(int faultId); - -#endif diff --git a/ndb/src/kernel/error/ErrorReporter.cpp b/ndb/src/kernel/error/ErrorReporter.cpp index 25409db48a8..248807db13d 100644 --- a/ndb/src/kernel/error/ErrorReporter.cpp +++ b/ndb/src/kernel/error/ErrorReporter.cpp @@ -17,9 +17,8 @@ #include <ndb_global.h> -#include "Error.hpp" +#include <ndbd_exit_codes.h> #include "ErrorReporter.hpp" -#include "ErrorMessages.hpp" #include <FastScheduler.hpp> #include <DebuggerNames.hpp> @@ -29,17 +28,9 @@ #include <NdbAutoPtr.hpp> -#define MESSAGE_LENGTH 400 +#define MESSAGE_LENGTH 500 -const char* errorType[] = { - "warning", - "error", - "fatal", - "assert" -}; - - -static int WriteMessage(ErrorCategory thrdType, int thrdMessageID, +static int WriteMessage(int thrdMessageID, const char* thrdProblemData, const char* thrdObjRef, Uint32 thrdTheEmulatedJamIndex, @@ -116,24 +107,35 @@ ErrorReporter::get_trace_no(){ void -ErrorReporter::formatMessage(ErrorCategory type, - int faultID, +ErrorReporter::formatMessage(int faultID, const char* problemData, const char* objRef, const char* theNameOfTheTraceFile, char* messptr){ int processId; - + ndbd_exit_classification cl; + ndbd_exit_status st; + const char *exit_msg = ndbd_exit_message(faultID, &cl); + const char *exit_cl_msg = ndbd_exit_classification_message(cl, &st); + const char *exit_st_msg = ndbd_exit_status_message(st); + processId = NdbHost_GetProcessId(); BaseString::snprintf(messptr, MESSAGE_LENGTH, - "Date/Time: %s\nType of error: %s\n" - "Message: %s\nFault ID: %d\nProblem data: %s" - "\nObject of reference: %s\nProgramName: %s\n" - "ProcessID: %d\nTraceFile: %s\n%s\n***EOM***\n", + "Time: %s\n" + "Status: %s\n" + "Message: %s (%s)\n" + "Error: %d\n" + "Error data: %s\n" + "Error object: %s\n" + "Program: %s\n" + "Pid: %d\n" + "Trace: %s\n" + "Version: %s\n" + "***EOM***\n", formatTimeStampString() , - errorType[type], - lookupErrorMessage(faultID), + exit_st_msg, + exit_msg, exit_cl_msg, faultID, (problemData == NULL) ? "" : problemData, objRef, @@ -160,6 +162,8 @@ ErrorReporter::setErrorHandlerShutdownType(NdbShutdownType nst) s_errorHandlerShutdownType = nst; } +void childReportError(int error); + void ErrorReporter::handleAssert(const char* message, const char* file, int line) { @@ -175,38 +179,26 @@ ErrorReporter::handleAssert(const char* message, const char* file, int line) BaseString::snprintf(refMessage, 100, "%s line: %d (block: %s)", file, line, blockName); #endif - WriteMessage(assert, ERR_ERROR_PRGERR, message, refMessage, + WriteMessage(NDBD_EXIT_PRGERR, message, refMessage, theEmulatedJamIndex, theEmulatedJam); - NdbShutdown(s_errorHandlerShutdownType); -} + childReportError(NDBD_EXIT_PRGERR); -void -ErrorReporter::handleThreadAssert(const char* message, - const char* file, - int line) -{ - char refMessage[100]; - BaseString::snprintf(refMessage, 100, "file: %s lineNo: %d - %s", - file, line, message); - NdbShutdown(s_errorHandlerShutdownType); -}//ErrorReporter::handleThreadAssert() - +} void -ErrorReporter::handleError(ErrorCategory type, int messageID, +ErrorReporter::handleError(int messageID, const char* problemData, const char* objRef, NdbShutdownType nst) { - type = ecError; - // The value for type is not always set correctly in the calling function. - // So, to correct this, we set it set it to the value corresponding to - // the function that is called. - WriteMessage(type, messageID, problemData, + WriteMessage(messageID, problemData, objRef, theEmulatedJamIndex, theEmulatedJam); - if(messageID == ERR_ERROR_INSERT){ + + childReportError(messageID); + + if(messageID == NDBD_EXIT_ERROR_INSERT){ NdbShutdown(NST_ErrorInsert); } else { if (nst == NST_ErrorHandler) @@ -216,7 +208,7 @@ ErrorReporter::handleError(ErrorCategory type, int messageID, } int -WriteMessage(ErrorCategory thrdType, int thrdMessageID, +WriteMessage(int thrdMessageID, const char* thrdProblemData, const char* thrdObjRef, Uint32 thrdTheEmulatedJamIndex, Uint8 thrdTheEmulatedJam[]){ @@ -257,7 +249,7 @@ WriteMessage(ErrorCategory thrdType, int thrdMessageID, " \n\n\n"); // ...and write the error-message... - ErrorReporter::formatMessage(thrdType, thrdMessageID, + ErrorReporter::formatMessage(thrdMessageID, thrdProblemData, thrdObjRef, theTraceFileName, theMessage); fprintf(stream, "%s", theMessage); @@ -284,7 +276,7 @@ WriteMessage(ErrorCategory thrdType, int thrdMessageID, fseek(stream, offset, SEEK_SET); // ...and write the error-message there... - ErrorReporter::formatMessage(thrdType, thrdMessageID, + ErrorReporter::formatMessage(thrdMessageID, thrdProblemData, thrdObjRef, theTraceFileName, theMessage); fprintf(stream, "%s", theMessage); diff --git a/ndb/src/kernel/error/ErrorReporter.hpp b/ndb/src/kernel/error/ErrorReporter.hpp index c5533df46f4..3f5e74f16b1 100644 --- a/ndb/src/kernel/error/ErrorReporter.hpp +++ b/ndb/src/kernel/error/ErrorReporter.hpp @@ -20,7 +20,6 @@ #include <ndb_global.h> #include "TimeModule.hpp" -#include "Error.hpp" #include <Emulator.hpp> class ErrorReporter @@ -31,23 +30,16 @@ public: const char* file, int line); - static void handleThreadAssert(const char* message, - const char* file, - int line); - - static void handleError(ErrorCategory type, - int faultID, + static void handleError(int faultID, const char* problemData, const char* objRef, enum NdbShutdownType = NST_ErrorHandler); - static void handleWarning(ErrorCategory type, - int faultID, + static void handleWarning(int faultID, const char* problemData, const char* objRef); - static void formatMessage(ErrorCategory type, - int faultID, + static void formatMessage(int faultID, const char* problemData, const char* objRef, const char* theNameOfTheTraceFile, diff --git a/ndb/src/kernel/error/Makefile.am b/ndb/src/kernel/error/Makefile.am index 54f3de2d76d..c58cdf80940 100644 --- a/ndb/src/kernel/error/Makefile.am +++ b/ndb/src/kernel/error/Makefile.am @@ -2,7 +2,7 @@ noinst_LIBRARIES = liberror.a liberror_a_SOURCES = TimeModule.cpp \ ErrorReporter.cpp \ - ErrorMessages.cpp + ndbd_exit_codes.c include $(top_srcdir)/ndb/config/common.mk.am include $(top_srcdir)/ndb/config/type_kernel.mk.am diff --git a/ndb/src/kernel/error/ndbd_exit_codes.c b/ndb/src/kernel/error/ndbd_exit_codes.c new file mode 100644 index 00000000000..355cb756459 --- /dev/null +++ b/ndb/src/kernel/error/ndbd_exit_codes.c @@ -0,0 +1,210 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <ndbd_exit_codes.h> + +typedef struct ErrStruct { + int faultId; + ndbd_exit_classification classification; + const char* text; +} ErrStruct; + +/** + * Shorter names in table below + */ + +#define XST_S ndbd_exit_st_success +#define XST_U ndbd_exit_st_unknown +#define XST_P ndbd_exit_st_permanent +#define XST_R ndbd_exit_st_temporary +#define XST_I ndbd_exit_st_temporary_i +#define XST_B ndbd_exit_st_bug + +#define XNE ndbd_exit_cl_none +#define XUE ndbd_exit_cl_unknown +#define XIE ndbd_exit_cl_internal_error +#define XCE ndbd_exit_cl_configuration_error +#define XAE ndbd_exit_cl_arbitration_error +#define XRE ndbd_exit_cl_restart_error +#define XCR ndbd_exit_cl_resource_configuration_error +#define XFF ndbd_exit_cl_filesystem_full_error +#define XFI ndbd_exit_cl_filesystem_inconsistency_error + +static const ErrStruct errArray[] = +{ + {NDBD_EXIT_PRGERR, XIE, "Assertion"}, + {NDBD_EXIT_NODE_NOT_IN_CONFIG, XCE, "Own Node Id not a NDB node"}, + {NDBD_EXIT_SYSTEM_ERROR, XIE, "System error"}, + {NDBD_EXIT_INDEX_NOTINRANGE, XIE, "Index too large"}, + {NDBD_EXIT_ARBIT_SHUTDOWN, XAE, "Arbitrator shutdown"}, + {NDBD_EXIT_POINTER_NOTINRANGE, XIE, "Pointer too large"}, + {NDBD_EXIT_SR_OTHERNODEFAILED, XRE, "Node failed during system restart"}, + {NDBD_EXIT_NODE_NOT_DEAD, XRE, "Node state conflict"}, + {NDBD_EXIT_SR_REDOLOG, XFI, "Error while reading the REDO log"}, + {2311, XIE, "Conflict when selecting restart type"}, + {NDBD_EXIT_NO_MORE_UNDOLOG, XCR, "No more free UNDO log"}, + {NDBD_EXIT_SR_UNDOLOG, XFI, "Error while reading the datapages and UNDO log"}, + {NDBD_EXIT_MEMALLOC, XCE, "Memory allocation failure"}, + {NDBD_EXIT_BLOCK_JBUFCONGESTION, XIE, "Job buffer congestion"}, + {NDBD_EXIT_TIME_QUEUE_SHORT, XIE, "Error in short time queue"}, + {NDBD_EXIT_TIME_QUEUE_LONG, XIE, "Error in long time queue"}, + {NDBD_EXIT_TIME_QUEUE_DELAY, XIE, "Error in time queue, too long delay"}, + {NDBD_EXIT_TIME_QUEUE_INDEX, XIE, "Time queue index out of range"}, + {NDBD_EXIT_BLOCK_BNR_ZERO, XIE, "Send signal error"}, + {NDBD_EXIT_WRONG_PRIO_LEVEL, XIE, "Wrong prio level when sending signal"}, + {NDBD_EXIT_NDBREQUIRE, XIE, "Internal program error (failed ndbrequire)"}, + {NDBD_EXIT_NDBASSERT, XIE, "Internal program error (failed ndbassert)"}, + {NDBD_EXIT_ERROR_INSERT, XNE, "Error insert executed" }, + {NDBD_EXIT_INVALID_CONFIG, XCE, + "Invalid Configuration fetched from Management Server" }, + + // VM + {NDBD_EXIT_OUT_OF_LONG_SIGNAL_MEMORY, XCE, + "Signal lost, out of long signal memory"}, + {NDBD_EXIT_WATCHDOG_TERMINATE, XIE, "WatchDog terminate"}, + {NDBD_EXIT_SIGNAL_LOST_SEND_BUFFER_FULL, XCE, + "Signal lost, out of send buffer memory"}, + {NDBD_EXIT_SIGNAL_LOST, XIE, "Signal lost (unknown reason)"}, + {NDBD_EXIT_ILLEGAL_SIGNAL, XCE, "Illegal signal (version mismatch?)"}, + + // DIH + {NDBD_EXIT_MAX_CRASHED_REPLICAS, XFI, "To many crasched replicas"}, + + // Ndbfs error messages + {NDBD_EXIT_AFS_NOPATH, XCE, "No file system path"}, + {2802, XIE, "Channel is full"}, + {2803, XIE, "No more threads"}, + {NDBD_EXIT_AFS_PARAMETER, XCE, "Bad parameter"}, + {NDBD_EXIT_AFS_INVALIDPATH, XCE, "Illegal file system path"}, + {NDBD_EXIT_AFS_MAXOPEN, XCE, "Max number of open files exceeded"}, + {NDBD_EXIT_AFS_ALREADY_OPEN, XIE, "File has already been opened"}, + + {NDBD_EXIT_AFS_ENVIRONMENT , XIE, "Environment error using file"}, + {NDBD_EXIT_AFS_TEMP_NO_ACCESS , XIE, "Temporary on access to file"}, + {NDBD_EXIT_AFS_DISK_FULL , XFF, "Filesystem full"}, + {NDBD_EXIT_AFS_PERMISSION_DENIED , XCE, "Permission denied for file"}, + {NDBD_EXIT_AFS_INVALID_PARAM , XCE, "Invalid parameter for file"}, + {NDBD_EXIT_AFS_UNKNOWN , XIE, "Unknown filesystem error"}, + {NDBD_EXIT_AFS_NO_MORE_RESOURCES , XIE, "No resources in filesystem"}, + {NDBD_EXIT_AFS_NO_SUCH_FILE , XFI, "File not found"}, + {NDBD_EXIT_AFS_READ_UNDERFLOW , XIE, "Read underflow"}, + + // Sentinel + {0, XUE, "No message slogan found"} +}; + +typedef struct StatusExitMessage { + ndbd_exit_status status; + const char * message; +} StatusExitMessage; + +typedef struct StatusExitClassification { + ndbd_exit_status status; + ndbd_exit_classification classification; + const char * message; +} StatusExitClassification; + +/** + * Mapping between classification and status + */ +static +const +StatusExitMessage StatusExitMessageMapping[] = { + { XST_S, "Success"}, + { XST_U ,"Unknown"}, + { XST_P, "Permanent error, external action needed"}, + { XST_R, "Temporary error, restart node"}, + { XST_I, "Temporary error, restart node initial"}, + { XST_B, "Programming error, please report a bug, try restarting node"} +}; + +static +const +int NbExitStatus = sizeof(StatusExitMessageMapping)/sizeof(StatusExitMessage); + +static +const +StatusExitClassification StatusExitClassificationMapping[] = { + { XST_S, XNE, "No error"}, + { XST_U, XUE, "Unknown"}, + { XST_R, XIE, "Internal error"}, + { XST_P, XCE, "Configuration error"}, + { XST_R, XAE, "Arbitration error"}, + { XST_R, XRE, "Restart error"}, + { XST_P, XCR, "Resource configuration error"}, + { XST_P, XFF, "File system full"}, + { XST_I, XFI, "File system inconsistency error"} +}; + +static const int NbExitClassification = +sizeof(StatusExitClassificationMapping)/sizeof(StatusExitClassification); + +const char *ndbd_exit_message(int faultId, ndbd_exit_classification *cl) +{ + int i = 0; + while (errArray[i].faultId != faultId && errArray[i].faultId != 0) + i++; + *cl = errArray[i].classification; + return errArray[i].text; +} + +static const char* empty_xstring = ""; + +const +char *ndbd_exit_classification_message(ndbd_exit_classification classification, + ndbd_exit_status *status) +{ + int i; + for (i= 0; i < NbExitClassification; i++) + { + if (StatusExitClassificationMapping[i].classification == classification) + { + *status = StatusExitClassificationMapping[i].status; + return StatusExitClassificationMapping[i].message; + } + } + *status = XST_U; + return empty_xstring; +} + +const char *ndbd_exit_status_message(ndbd_exit_status status) +{ + int i; + for (i= 0; i < NbExitStatus; i++) + if (StatusExitMessageMapping[i].status == status) + return StatusExitMessageMapping[i].message; + return empty_xstring; +} + +int ndbd_exit_string(int err_no, char *str, unsigned int size) +{ + unsigned int len; + + ndbd_exit_classification cl; + ndbd_exit_status st; + const char *msg = ndbd_exit_message(err_no, &cl); + if (msg[0] != '\0') + { + const char *cl_msg = ndbd_exit_classification_message(cl, &st); + const char *st_msg = ndbd_exit_status_message(st); + + len = my_snprintf(str, size-1, "%s: %s: %s", msg, st_msg, cl_msg); + str[size-1]= '\0'; + + return len; + } + return -1; +} diff --git a/ndb/src/kernel/main.cpp b/ndb/src/kernel/main.cpp index bec9c8b28f4..6b2a0789c90 100644 --- a/ndb/src/kernel/main.cpp +++ b/ndb/src/kernel/main.cpp @@ -37,6 +37,8 @@ #include <NdbAutoPtr.hpp> +#include <Properties.hpp> + #include <mgmapi_debug.h> #if defined NDB_SOLARIS // ok @@ -61,16 +63,181 @@ extern "C" void handler_sigusr1(int signum); // child signalling failed restart void systemInfo(const Configuration & conf, const LogLevel & ll); +static FILE *child_info_file_r= 0; +static FILE *child_info_file_w= 0; + +static void writeChildInfo(const char *token, int val) +{ + fprintf(child_info_file_w, "%s=%d\n", token, val); + fflush(child_info_file_w); +} + +void childReportSignal(int signum) +{ + writeChildInfo("signal", signum); +} + +void childReportError(int error) +{ + writeChildInfo("error", error); +} + +void childExit(int code, Uint32 currentStartPhase) +{ + writeChildInfo("sphase", currentStartPhase); + writeChildInfo("exit", code); + fprintf(child_info_file_w, "\n"); + fclose(child_info_file_r); + fclose(child_info_file_w); + exit(code); +} + +void childAbort(int code, Uint32 currentStartPhase) +{ + writeChildInfo("sphase", currentStartPhase); + writeChildInfo("exit", code); + fprintf(child_info_file_w, "\n"); + fclose(child_info_file_r); + fclose(child_info_file_w); + signal(6, SIG_DFL); + abort(); +} + +static int insert(const char * pair, Properties & p) +{ + BaseString tmp(pair); + + tmp.trim(" \t\n\r"); + Vector<BaseString> split; + tmp.split(split, ":=", 2); + if(split.size() != 2) + return -1; + p.put(split[0].trim().c_str(), split[1].trim().c_str()); + return 0; +} + +static int readChildInfo(Properties &info) +{ + fclose(child_info_file_w); + char buf[128]; + while (fgets(buf,sizeof(buf),child_info_file_r)) + insert(buf,info); + fclose(child_info_file_r); + return 0; +} + +static bool get_int_property(Properties &info, + const char *token, Uint32 *int_val) +{ + const char *str_val= 0; + if (!info.get(token, &str_val)) + return false; + char *endptr; + long int tmp= strtol(str_val, &endptr, 10); + if (str_val == endptr) + return false; + *int_val = tmp; + return true; +} + +int reportShutdown(class Configuration *config, int error_exit, int restart) +{ + Uint32 error= 0, signum= 0, sphase= 256; + Properties info; + readChildInfo(info); + + get_int_property(info, "signal", &signum); + get_int_property(info, "error", &error); + get_int_property(info, "sphase", &sphase); + + Uint32 length, theData[25]; + EventReport *rep = (EventReport *)theData; + + rep->setNodeId(globalData.ownId); + if (restart) + theData[1] = 1 | + (globalData.theRestartFlag == initial_state ? 2 : 0) | + (config->getInitialStart() ? 4 : 0); + else + theData[1] = 0; + + if (error_exit == 0) + { + rep->setEventType(NDB_LE_NDBStopCompleted); + theData[2] = signum; + length = 3; + } + else + { + rep->setEventType(NDB_LE_NDBStopForced); + theData[2] = signum; + theData[3] = error; + theData[4] = sphase; + theData[5] = 0; // extra + length = 6; + } + + { // Log event + const EventReport * const eventReport = (EventReport *)&theData[0]; + g_eventLogger.log(eventReport->getEventType(), theData, + eventReport->getNodeId(), 0); + } + + for (unsigned n = 0; n < config->m_mgmds.size(); n++) + { + NdbMgmHandle h = ndb_mgm_create_handle(); + if (h == 0 || + ndb_mgm_set_connectstring(h, config->m_mgmds[n].c_str()) || + ndb_mgm_connect(h, + 1, //no_retries + 0, //retry_delay_in_seconds + 0 //verbose + )) + goto handle_error; + + { + if (ndb_mgm_report_event(h, theData, length)) + goto handle_error; + } + goto do_next; + +handle_error: + if (h) + { + BaseString tmp(ndb_mgm_get_latest_error_msg(h)); + tmp.append(" : "); + tmp.append(ndb_mgm_get_latest_error_desc(h)); + g_eventLogger.warning("Unable to report shutdown reason to %s: %s", + config->m_mgmds[n].c_str(), tmp.c_str()); + } + else + { + g_eventLogger.error("Unable to report shutdown reason to %s", + config->m_mgmds[n].c_str()); + } +do_next: + if (h) + { + ndb_mgm_disconnect(h); + ndb_mgm_destroy_handle(&h); + } + } + return 0; +} + int main(int argc, char** argv) { NDB_INIT(argv[0]); // Print to stdout/console g_eventLogger.createConsoleHandler(); g_eventLogger.setCategory("ndbd"); + g_eventLogger.enable(Logger::LL_ON, Logger::LL_INFO); g_eventLogger.enable(Logger::LL_ON, Logger::LL_CRITICAL); g_eventLogger.enable(Logger::LL_ON, Logger::LL_ERROR); g_eventLogger.enable(Logger::LL_ON, Logger::LL_WARNING); + g_eventLogger.m_logLevel.setLogLevel(LogLevel::llStartUp, 15); + globalEmulatorData.create(); // Parse command line options @@ -103,10 +270,38 @@ int main(int argc, char** argv) #ifndef NDB_WIN32 signal(SIGUSR1, handler_sigusr1); - for(pid_t child = fork(); child != 0; child = fork()){ + pid_t child; + while (1) + { + // setup reporting between child and parent + int filedes[2]; + if (pipe(filedes)) + { + g_eventLogger.error("pipe() failed with errno=%d (%s)", + errno, strerror(errno)); + return 1; + } + else + { + if (!(child_info_file_w= fdopen(filedes[1],"w"))) + { + g_eventLogger.error("fdopen() failed with errno=%d (%s)", + errno, strerror(errno)); + } + if (!(child_info_file_r= fdopen(filedes[0],"r"))) + { + g_eventLogger.error("fdopen() failed with errno=%d (%s)", + errno, strerror(errno)); + } + } + + if ((child = fork()) <= 0) + break; // child or error + /** * Parent */ + catchsigs(true); /** @@ -115,12 +310,13 @@ int main(int argc, char** argv) */ theConfig->closeConfiguration(); - int status = 0; + int status = 0, error_exit = 0, signum = 0; while(waitpid(child, &status, 0) != child); if(WIFEXITED(status)){ switch(WEXITSTATUS(status)){ case NRT_Default: g_eventLogger.info("Angel shutting down"); + reportShutdown(theConfig, 0, 0); exit(0); break; case NRT_NoStart_Restart: @@ -136,10 +332,12 @@ int main(int argc, char** argv) globalData.theRestartFlag = perform_start; break; default: + error_exit = 1; if(theConfig->stopOnError()){ /** * Error shutdown && stopOnError() */ + reportShutdown(theConfig, error_exit, 0); exit(0); } // Fall-through @@ -148,12 +346,27 @@ int main(int argc, char** argv) globalData.theRestartFlag = perform_start; break; } - } else if(theConfig->stopOnError()){ - /** - * Error shutdown && stopOnError() - */ - exit(0); + } else { + error_exit = 1; + if (WIFSIGNALED(status)) + { + signum = WTERMSIG(status); + childReportSignal(signum); + } + else + { + signum = 127; + g_eventLogger.info("Unknown exit reason. Stopped."); + } + if(theConfig->stopOnError()){ + /** + * Error shutdown && stopOnError() + */ + reportShutdown(theConfig, error_exit, 0); + exit(0); + } } + if (!failed_startup_flag) { // Reset the counter for consecutive failed startups @@ -164,15 +377,21 @@ int main(int argc, char** argv) /** * Error shutdown && stopOnError() */ - g_eventLogger.alert("Ndbd has failed %u consecutive startups. Not restarting", failed_startups); + g_eventLogger.alert("Ndbd has failed %u consecutive startups. " + "Not restarting", failed_startups); + reportShutdown(theConfig, error_exit, 0); exit(0); } failed_startup_flag = false; + reportShutdown(theConfig, error_exit, 1); g_eventLogger.info("Ndb has terminated (pid %d) restarting", child); theConfig->fetch_configuration(); } - g_eventLogger.info("Angel pid: %d ndb pid: %d", getppid(), getpid()); + if (child >= 0) + g_eventLogger.info("Angel pid: %d ndb pid: %d", getppid(), getpid()); + else + g_eventLogger.info("Ndb pid: %d", getpid()); #else g_eventLogger.info("Ndb started"); #endif @@ -226,7 +445,7 @@ int main(int argc, char** argv) // Re-use the mgm handle as a transporter if(!globalTransporterRegistry.connect_client( theConfig->get_config_retriever()->get_mgmHandlePtr())) - ERROR_SET(fatal, ERR_INVALID_CONFIG, + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Connection to mgmd terminated before setup was complete", "StopOnError missing"); @@ -371,6 +590,8 @@ extern "C" void handler_shutdown(int signum){ g_eventLogger.info("Received signal %d. Performing stop.", signum); + childReportError(0); + childReportSignal(signum); globalData.theRestartFlag = perform_stop; } @@ -395,6 +616,7 @@ handler_error(int signum){ NdbSleep_MilliSleep(10); thread_id= my_thread_id(); g_eventLogger.info("Received signal %d. Running error handler.", signum); + writeChildInfo("signal", signum); // restart the system char errorData[40]; BaseString::snprintf(errorData, 40, "Signal %d received", signum); diff --git a/ndb/src/kernel/vm/ClusterConfiguration.cpp b/ndb/src/kernel/vm/ClusterConfiguration.cpp index d5bd03f69d5..813407b497e 100644 --- a/ndb/src/kernel/vm/ClusterConfiguration.cpp +++ b/ndb/src/kernel/vm/ClusterConfiguration.cpp @@ -359,12 +359,12 @@ void ClusterConfiguration::init(const Properties & p, const Properties & db){ if(!db.get(tmp[i].attrib, tmp[i].storage)){ char buf[255]; BaseString::snprintf(buf, sizeof(buf), "%s not found", tmp[i].attrib); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } } if(!p.get("NoOfNodes", &cd.SizeAltData.noOfNodes)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, "NoOfNodes missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, "NoOfNodes missing"); } Properties::Iterator it(&p); @@ -378,36 +378,36 @@ void ClusterConfiguration::init(const Properties & p, const Properties & db){ const Properties * node; if(!p.get(name, &node)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, "Node data missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, "Node data missing"); } if(!node->get("Id", &nodeId)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, "Node data (Id) missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, "Node data (Id) missing"); } if(!node->get("Type", &nodeType)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, "Node data (Type) missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, "Node data (Type) missing"); } if(nodeId > MAX_NODES){ char buf[255]; snprintf(buf, sizeof(buf), "Maximum DB node id allowed is: %d", MAX_NDB_NODES); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } if(nodeId == 0){ char buf[255]; snprintf(buf, sizeof(buf), "Minimum node id allowed in the cluster is: 1"); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } for(unsigned j = 0; j<nodeNo; j++){ if(cd.nodeData[j].nodeId == nodeId){ char buf[255]; BaseString::snprintf(buf, sizeof(buf), "Two node can not have the same node id"); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } } @@ -430,14 +430,14 @@ void ClusterConfiguration::init(const Properties & p, const Properties & db){ if(nodeId > MAX_NDB_NODES){ char buf[255]; BaseString::snprintf(buf, sizeof(buf), "Maximum node id for a ndb node is: %d", MAX_NDB_NODES); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } if(cd.SizeAltData.noOfNDBNodes > MAX_NDB_NODES){ char buf[255]; BaseString::snprintf(buf, sizeof(buf), "Maximum %d ndb nodes is allowed in the cluster", MAX_NDB_NODES); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } } else if(strcmp("API", nodeType) == 0){ cd.nodeData[nodeNo].nodeType = NodeInfo::API; @@ -452,7 +452,7 @@ void ClusterConfiguration::init(const Properties & p, const Properties & db){ cd.SizeAltData.noOfMGMNodes++; // No of MGM processes tmpApiMgmProperties = "MGM"; } else { - ERROR_SET(fatal, ERR_INVALID_CONFIG, + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration: Unknown node type", nodeType); } @@ -462,7 +462,7 @@ void ClusterConfiguration::init(const Properties & p, const Properties & db){ const Properties* q = 0; if (!p.get(tmpApiMgmProperties, nodeId, &q)) { - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, tmpApiMgmProperties); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, tmpApiMgmProperties); } else { */ Uint32 rank = 0; diff --git a/ndb/src/kernel/vm/Configuration.cpp b/ndb/src/kernel/vm/Configuration.cpp index 650d914035f..773c074f367 100644 --- a/ndb/src/kernel/vm/Configuration.cpp +++ b/ndb/src/kernel/vm/Configuration.cpp @@ -194,7 +194,7 @@ Configuration::fetch_configuration(){ if (m_config_retriever->hasError()) { - ERROR_SET(fatal, ERR_INVALID_CONFIG, + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Could not connect initialize handle to management server", m_config_retriever->getErrorString()); } @@ -206,7 +206,7 @@ Configuration::fetch_configuration(){ /* Set stop on error to true otherwise NDB will go into an restart loop... */ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Could not connect to ndb_mgmd", s); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Could not connect to ndb_mgmd", s); } m_mgmd_port= m_config_retriever->get_mgmd_port(); @@ -224,7 +224,7 @@ Configuration::fetch_configuration(){ globalData.ownId = cr.allocNodeId(2 /*retry*/,3 /*delay*/); if(globalData.ownId == 0){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Unable to alloc node id", m_config_retriever->getErrorString()); } @@ -238,7 +238,7 @@ Configuration::fetch_configuration(){ go into an restart loop... */ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Could not fetch configuration" + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Could not fetch configuration" "/invalid configuration", s); } if(m_clusterConfig) @@ -248,13 +248,36 @@ Configuration::fetch_configuration(){ ndb_mgm_configuration_iterator iter(* p, CFG_SECTION_NODE); if (iter.find(CFG_NODE_ID, globalData.ownId)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", "DB missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "DB missing"); } if(iter.get(CFG_DB_STOP_ON_ERROR, &_stopOnError)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "StopOnError missing"); } + + m_mgmds.clear(); + for(ndb_mgm_first(&iter); ndb_mgm_valid(&iter); ndb_mgm_next(&iter)) + { + Uint32 nodeType, port; + char const *hostname; + + ndb_mgm_get_int_parameter(&iter,CFG_TYPE_OF_SECTION,&nodeType); + + if (nodeType != NodeInfo::MGM) + continue; + + if (ndb_mgm_get_string_parameter(&iter,CFG_NODE_HOST, &hostname) || + ndb_mgm_get_int_parameter(&iter,CFG_MGM_PORT, &port) || + hostname == 0 || hostname[0] == 0) + { + continue; + } + BaseString connectstring(hostname); + connectstring.appfmt(":%d", port); + + m_mgmds.push_back(connectstring); + } } static char * get_and_validate_path(ndb_mgm_configuration_iterator &iter, @@ -262,12 +285,12 @@ static char * get_and_validate_path(ndb_mgm_configuration_iterator &iter, { const char* path = NULL; if(iter.get(param, &path)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched missing ", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched missing ", param_string); } if(path == 0 || strlen(path) == 0){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched. Configuration does not contain valid ", param_string); } @@ -285,7 +308,7 @@ static char * get_and_validate_path(ndb_mgm_configuration_iterator &iter, (::access(buf2, W_OK) != 0)) #endif { - ERROR_SET(fatal, AFS_ERROR_INVALIDPATH, path, " Filename::init()"); + ERROR_SET(fatal, NDBD_EXIT_AFS_INVALIDPATH, path, " Filename::init()"); } if (strcmp(&buf2[strlen(buf2) - 1], DIR_SEPARATOR)) @@ -309,7 +332,7 @@ Configuration::setupConfiguration(){ * p, globalTransporterRegistry); if(res <= 0){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "No transporters configured"); } } @@ -319,27 +342,27 @@ Configuration::setupConfiguration(){ */ ndb_mgm_configuration_iterator iter(* p, CFG_SECTION_NODE); if (iter.find(CFG_NODE_ID, globalData.ownId)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", "DB missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "DB missing"); } unsigned type; if(!(iter.get(CFG_TYPE_OF_SECTION, &type) == 0 && type == NODE_TYPE_DB)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "I'm wrong type of node"); } if(iter.get(CFG_DB_NO_SAVE_MSGS, &_maxErrorLogs)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "MaxNoOfSavedMessages missing"); } if(iter.get(CFG_DB_MEMLOCK, &_lockPagesInMainMemory)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "LockPagesInMainMemory missing"); } if(iter.get(CFG_DB_WATCHDOG_INTERVAL, &_timeBetweenWatchDogCheck)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "TimeBetweenWatchDogCheck missing"); } @@ -354,7 +377,7 @@ Configuration::setupConfiguration(){ _backupPath= get_and_validate_path(iter, CFG_DB_BACKUP_DATADIR, "BackupDataDir"); if(iter.get(CFG_DB_STOP_ON_ERROR_INSERT, &m_restartOnErrorInsert)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "RestartOnErrorInsert missing"); } @@ -496,7 +519,7 @@ Configuration::calcSizeAlt(ConfigValues * ownConfig){ *tmp[i].storage = 0; } else { BaseString::snprintf(buf, sizeof(buf),"ConfigParam: %d not found", tmp[i].paramId); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } } } @@ -506,12 +529,12 @@ Configuration::calcSizeAlt(ConfigValues * ownConfig){ ndb_mgm_get_int64_parameter(&db, CFG_DB_INDEX_MEM, &indexMem); if(dataMem == 0){ BaseString::snprintf(buf, sizeof(buf), "ConfigParam: %d not found", CFG_DB_DATA_MEM); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } if(indexMem == 0){ BaseString::snprintf(buf, sizeof(buf), "ConfigParam: %d not found", CFG_DB_INDEX_MEM); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } noOfDataPages = (dataMem / 32768); @@ -535,23 +558,23 @@ Configuration::calcSizeAlt(ConfigValues * ownConfig){ Uint32 nodeType; if(ndb_mgm_get_int_parameter(p, CFG_NODE_ID, &nodeId)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, "Node data (Id) missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, "Node data (Id) missing"); } if(ndb_mgm_get_int_parameter(p, CFG_TYPE_OF_SECTION, &nodeType)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, "Node data (Type) missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, "Node data (Type) missing"); } if(nodeId > MAX_NODES || nodeId == 0){ BaseString::snprintf(buf, sizeof(buf), "Invalid node id: %d", nodeId); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } if(nodes.get(nodeId)){ BaseString::snprintf(buf, sizeof(buf), "Two node can not have the same node id: %d", nodeId); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } nodes.set(nodeId); @@ -562,7 +585,7 @@ Configuration::calcSizeAlt(ConfigValues * ownConfig){ if(nodeId > MAX_NDB_NODES){ BaseString::snprintf(buf, sizeof(buf), "Maximum node id for a ndb node is: %d", MAX_NDB_NODES); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } break; case NODE_TYPE_API: @@ -577,7 +600,7 @@ Configuration::calcSizeAlt(ConfigValues * ownConfig){ break; default: BaseString::snprintf(buf, sizeof(buf), "Unknown node type: %d", nodeType); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } } noOfNodes = nodeNo; diff --git a/ndb/src/kernel/vm/Configuration.hpp b/ndb/src/kernel/vm/Configuration.hpp index 6ca6d9a1f17..5043d1f0bee 100644 --- a/ndb/src/kernel/vm/Configuration.hpp +++ b/ndb/src/kernel/vm/Configuration.hpp @@ -75,6 +75,8 @@ public: private: friend class Cmvmi; friend class Qmgr; + friend int reportShutdown(class Configuration *config, int error, int restart); + ndb_mgm_configuration_iterator * getClusterConfigIterator() const; Uint32 _stopOnError; @@ -91,6 +93,8 @@ private: ConfigRetriever *m_config_retriever; + Vector<BaseString> m_mgmds; + /** * arguments to NDB process */ diff --git a/ndb/src/kernel/vm/Emulator.cpp b/ndb/src/kernel/vm/Emulator.cpp index 058829e05e2..2105b7ddb5e 100644 --- a/ndb/src/kernel/vm/Emulator.cpp +++ b/ndb/src/kernel/vm/Emulator.cpp @@ -35,11 +35,16 @@ #include <EventLogger.hpp> +void childExit(int code, Uint32 currentStartPhase); +void childAbort(int code, Uint32 currentStartPhase); + extern "C" { extern void (* ndb_new_handler)(); } extern EventLogger g_eventLogger; extern my_bool opt_core; +// instantiated and updated in NdbcntrMain.cpp +extern Uint32 g_currentStartPhase; /** * Declare the global variables @@ -76,7 +81,7 @@ EmulatorData::EmulatorData(){ void ndb_new_handler_impl(){ - ERROR_SET(fatal, ERR_MEMALLOC, "New handler", ""); + ERROR_SET(fatal, NDBD_EXIT_MEMALLOC, "New handler", ""); } void @@ -111,8 +116,8 @@ EmulatorData::destroy(){ void NdbShutdown(NdbShutdownType type, - NdbRestartType restartType){ - + NdbRestartType restartType) +{ if(type == NST_ErrorInsert){ type = NST_Restart; restartType = (NdbRestartType) @@ -181,12 +186,11 @@ NdbShutdown(NdbShutdownType type, g_eventLogger.info("Watchdog shutdown completed - %s", exitAbort); if (opt_core) { - signal(6, SIG_DFL); - abort(); + childAbort(-1,g_currentStartPhase); } else { - exit(-1); + childExit(-1,g_currentStartPhase); } } @@ -241,12 +245,11 @@ NdbShutdown(NdbShutdownType type, g_eventLogger.info("Error handler shutdown completed - %s", exitAbort); if (opt_core) { - signal(6, SIG_DFL); - abort(); + childAbort(-1,g_currentStartPhase); } else { - exit(-1); + childExit(-1,g_currentStartPhase); } } @@ -254,7 +257,7 @@ NdbShutdown(NdbShutdownType type, * This is a normal restart, depend on angel */ if(type == NST_Restart){ - exit(restartType); + childExit(restartType,g_currentStartPhase); } g_eventLogger.info("Shutdown completed - exiting"); @@ -269,10 +272,9 @@ NdbShutdown(NdbShutdownType type, if (type== NST_Watchdog){ g_eventLogger.info("Watchdog is killing system the hard way"); #if defined VM_TRACE && ( ! ( defined NDB_OSE || defined NDB_SOFTOSE) ) - signal(6, SIG_DFL); - abort(); + childAbort(-1,g_currentStartPhase); #else - exit(-1); + childExit(-1,g_currentStartPhase); #endif } diff --git a/ndb/src/kernel/vm/FastScheduler.cpp b/ndb/src/kernel/vm/FastScheduler.cpp index a2d806571fe..5c68cbe6480 100644 --- a/ndb/src/kernel/vm/FastScheduler.cpp +++ b/ndb/src/kernel/vm/FastScheduler.cpp @@ -19,7 +19,6 @@ #include "Emulator.hpp" #include "VMSignal.hpp" -#include <Error.hpp> #include <SignalLoggerManager.hpp> #include <BlockNumbers.h> @@ -444,21 +443,21 @@ void FastScheduler::dumpSignalMemory(FILE * output) void FastScheduler::prio_level_error() { - ERROR_SET(ecError, ERROR_WRONG_PRIO_LEVEL, + ERROR_SET(ecError, NDBD_EXIT_WRONG_PRIO_LEVEL, "Wrong Priority Level", "FastScheduler.C"); } void jbuf_error() { - ERROR_SET(ecError, BLOCK_ERROR_JBUFCONGESTION, + ERROR_SET(ecError, NDBD_EXIT_BLOCK_JBUFCONGESTION, "Job Buffer Full", "APZJobBuffer.C"); } void bnr_error() { - ERROR_SET(ecError, BLOCK_ERROR_BNR_ZERO, + ERROR_SET(ecError, NDBD_EXIT_BLOCK_BNR_ZERO, "Block Number Zero", "FastScheduler.C"); } diff --git a/ndb/src/kernel/vm/SimulatedBlock.cpp b/ndb/src/kernel/vm/SimulatedBlock.cpp index 57a4032e40b..9e1cff7a0b2 100644 --- a/ndb/src/kernel/vm/SimulatedBlock.cpp +++ b/ndb/src/kernel/vm/SimulatedBlock.cpp @@ -30,6 +30,7 @@ #include <signaldata/ContinueFragmented.hpp> #include <signaldata/NodeStateSignalData.hpp> #include <signaldata/FsRef.hpp> +#include <signaldata/SignalDroppedRep.hpp> #include <DebuggerNames.hpp> #include "LongSignal.hpp" @@ -156,8 +157,8 @@ SimulatedBlock::addRecSignalImpl(GlobalSignalNumber gsn, if(gsn > MAX_GSN || (!force && theExecArray[gsn] != 0)){ char errorMsg[255]; BaseString::snprintf(errorMsg, 255, - "Illeagal signal (%d %d)", gsn, MAX_GSN); - ERROR_SET(fatal, ERR_ERROR_PRGERR, errorMsg, errorMsg); + "GSN %d(%d))", gsn, MAX_GSN); + ERROR_SET(fatal, NDBD_EXIT_ILLEGAL_SIGNAL, errorMsg, errorMsg); } theExecArray[gsn] = f; } @@ -173,8 +174,7 @@ SimulatedBlock::signal_error(Uint32 gsn, Uint32 len, Uint32 recBlockNo, "Signal (GSN: %d, Length: %d, Rec Block No: %d)", gsn, len, recBlockNo); - ErrorReporter::handleError(ecError, - BLOCK_ERROR_BNR_ZERO, + ErrorReporter::handleError(NDBD_EXIT_BLOCK_BNR_ZERO, probData, objRef); } @@ -676,7 +676,7 @@ SimulatedBlock::allocRecord(const char * type, size_t s, size_t n, bool clear) getBlockName(number()), type); BaseString::snprintf(buf2, sizeof(buf2), "Requested: %ux%u = %u bytes", (Uint32)s, (Uint32)n, (Uint32)size); - ERROR_SET(fatal, ERR_MEMALLOC, buf1, buf2); + ERROR_SET(fatal, NDBD_EXIT_MEMALLOC, buf1, buf2); } if(clear){ @@ -733,7 +733,7 @@ SimulatedBlock::progError(int line, int err_code, const char* extra) const { BaseString::snprintf(&buf[0], 100, "%s (Line: %d) 0x%.8x", aBlockName, line, magicStatus); - ErrorReporter::handleError(ecError, err_code, extra, buf); + ErrorReporter::handleError(err_code, extra, buf); } @@ -854,9 +854,12 @@ SimulatedBlock::execNDB_TAMPER(Signal * signal){ void SimulatedBlock::execSIGNAL_DROPPED_REP(Signal * signal){ - ErrorReporter::handleError(ecError, - ERR_OUT_OF_LONG_SIGNAL_MEMORY, - "Signal lost, out of long signal memory", + char msg[64]; + const SignalDroppedRep * const rep = (SignalDroppedRep *)&signal->theData[0]; + snprintf(msg, sizeof(msg), "%s GSN: %u (%u,%u)", getBlockName(number()), + rep->originalGsn, rep->originalLength,rep->originalSectionCount); + ErrorReporter::handleError(NDBD_EXIT_OUT_OF_LONG_SIGNAL_MEMORY, + msg, __FILE__, NST_ErrorHandler); } diff --git a/ndb/src/kernel/vm/SimulatedBlock.hpp b/ndb/src/kernel/vm/SimulatedBlock.hpp index bba92ca7c31..c1b540ff50c 100644 --- a/ndb/src/kernel/vm/SimulatedBlock.hpp +++ b/ndb/src/kernel/vm/SimulatedBlock.hpp @@ -34,7 +34,6 @@ #include "LongSignal.hpp" #include <SignalLoggerManager.hpp> -#include <Error.hpp> #include <ErrorReporter.hpp> #include <ErrorHandlingMacros.hpp> @@ -564,11 +563,11 @@ SimulatedBlock::executeFunction(GlobalSignalNumber gsn, Signal* signal){ char errorMsg[255]; if (!(gsn <= MAX_GSN)) { BaseString::snprintf(errorMsg, 255, "Illegal signal received (GSN %d too high)", gsn); - ERROR_SET(fatal, ERR_ERROR_PRGERR, errorMsg, errorMsg); + ERROR_SET(fatal, NDBD_EXIT_PRGERR, errorMsg, errorMsg); } if (!(theExecArray[gsn] != 0)) { BaseString::snprintf(errorMsg, 255, "Illegal signal received (GSN %d not added)", gsn); - ERROR_SET(fatal, ERR_ERROR_PRGERR, errorMsg, errorMsg); + ERROR_SET(fatal, NDBD_EXIT_PRGERR, errorMsg, errorMsg); } ndbrequire(false); } diff --git a/ndb/src/kernel/vm/TimeQueue.cpp b/ndb/src/kernel/vm/TimeQueue.cpp index 56988c2e3da..0b620c75d52 100644 --- a/ndb/src/kernel/vm/TimeQueue.cpp +++ b/ndb/src/kernel/vm/TimeQueue.cpp @@ -19,7 +19,6 @@ #include <GlobalData.hpp> #include <FastScheduler.hpp> #include <VMSignal.hpp> -#include <Error.hpp> static const int MAX_TIME_QUEUE_VALUE = 32000; @@ -70,7 +69,7 @@ TimeQueue::insert(Signal* signal, BlockNumber bnr, if (regShortIndex == 0){ theShortQueue[0].copy_struct = newEntry.copy_struct; } else if (regShortIndex >= MAX_NO_OF_SHORT_TQ - 1) { - ERROR_SET(ecError, ERROR_TIME_QUEUE_SHORT, + ERROR_SET(ecError, NDBD_EXIT_TIME_QUEUE_SHORT, "Too many in Short Time Queue", "TimeQueue.C" ); } else { for (i = 0; i < regShortIndex; i++) { @@ -99,7 +98,7 @@ TimeQueue::insert(Signal* signal, BlockNumber bnr, if (regLongIndex == 0) { theLongQueue[0].copy_struct = newEntry.copy_struct; } else if (regLongIndex >= MAX_NO_OF_LONG_TQ - 1) { - ERROR_SET(ecError, ERROR_TIME_QUEUE_LONG, + ERROR_SET(ecError, NDBD_EXIT_TIME_QUEUE_LONG, "Too many in Long Time Queue", "TimeQueue.C" ); } else { for (i = 0; i < regLongIndex; i++) { @@ -124,7 +123,7 @@ TimeQueue::insert(Signal* signal, BlockNumber bnr, } globalData.theLongTQIndex = regLongIndex + 1; } else { - ERROR_SET(ecError, ERROR_TIME_QUEUE_DELAY, + ERROR_SET(ecError, NDBD_EXIT_TIME_QUEUE_DELAY, "Too long delay for Time Queue", "TimeQueue.C" ); } } @@ -194,7 +193,7 @@ TimeQueue::getIndex() Uint32 retValue = globalData.theFirstFreeTQIndex; globalData.theFirstFreeTQIndex = (Uint32)theFreeIndex[retValue]; if (retValue >= MAX_NO_OF_TQ) - ERROR_SET(fatal, ERROR_TIME_QUEUE_INDEX, + ERROR_SET(fatal, NDBD_EXIT_TIME_QUEUE_INDEX, "Index out of range", "TimeQueue.C" ); return retValue; } diff --git a/ndb/src/kernel/vm/TransporterCallback.cpp b/ndb/src/kernel/vm/TransporterCallback.cpp index e5322edaecc..0bdfcf16689 100644 --- a/ndb/src/kernel/vm/TransporterCallback.cpp +++ b/ndb/src/kernel/vm/TransporterCallback.cpp @@ -314,18 +314,14 @@ reportError(void * callbackObj, NodeId nodeId, TransporterError errorCode){ #endif if(errorCode == TE_SIGNAL_LOST_SEND_BUFFER_FULL){ - ErrorReporter::handleError(ecError, - ERR_PROGRAMERROR, - "Signal lost, send buffer full", - __FILE__, + ErrorReporter::handleError(NDBD_EXIT_SIGNAL_LOST_SEND_BUFFER_FULL, + "", __FILE__, NST_ErrorHandler); } if(errorCode == TE_SIGNAL_LOST){ - ErrorReporter::handleError(ecError, - ERR_PROGRAMERROR, - "Signal lost (unknown reason)", - __FILE__, + ErrorReporter::handleError(NDBD_EXIT_SIGNAL_LOST, + "", __FILE__, NST_ErrorHandler); } diff --git a/ndb/src/kernel/vm/WatchDog.cpp b/ndb/src/kernel/vm/WatchDog.cpp index 23475a478d3..c80317e1725 100644 --- a/ndb/src/kernel/vm/WatchDog.cpp +++ b/ndb/src/kernel/vm/WatchDog.cpp @@ -95,39 +95,40 @@ WatchDog::run(){ globalData.incrementWatchDogCounter(0); alerts = 0; } else { + const char *last_stuck_action; alerts++; - ndbout << "Ndb kernel is stuck in: "; switch (oldIPValue) { case 1: - ndbout << "Job Handling" << endl; + last_stuck_action = "Job Handling"; break; case 2: - ndbout << "Scanning Timers" << endl; + last_stuck_action = "Scanning Timers"; break; case 3: - ndbout << "External I/O" << endl; + last_stuck_action = "External I/O"; break; case 4: - ndbout << "Print Job Buffers at crash" << endl; + last_stuck_action = "Print Job Buffers at crash"; break; case 5: - ndbout << "Checking connections" << endl; + last_stuck_action = "Checking connections"; break; case 6: - ndbout << "Performing Send" << endl; + last_stuck_action = "Performing Send"; break; case 7: - ndbout << "Polling for Receive" << endl; + last_stuck_action = "Polling for Receive"; break; case 8: - ndbout << "Performing Receive" << endl; + last_stuck_action = "Performing Receive"; break; default: - ndbout << "Unknown place" << endl; + last_stuck_action = "Unknown place"; break; }//switch + ndbout << "Ndb kernel is stuck in: " << last_stuck_action << endl; if(alerts == 3){ - shutdownSystem(); + shutdownSystem(last_stuck_action); } } } @@ -135,11 +136,10 @@ WatchDog::run(){ } void -WatchDog::shutdownSystem(){ +WatchDog::shutdownSystem(const char *last_stuck_action){ - ErrorReporter::handleError(ecError, - ERR_PROGRAMERROR, - "WatchDog terminate", + ErrorReporter::handleError(NDBD_EXIT_WATCHDOG_TERMINATE, + last_stuck_action, __FILE__, NST_Watchdog); } diff --git a/ndb/src/kernel/vm/WatchDog.hpp b/ndb/src/kernel/vm/WatchDog.hpp index 4b44b1a96a2..65b23dafdb1 100644 --- a/ndb/src/kernel/vm/WatchDog.hpp +++ b/ndb/src/kernel/vm/WatchDog.hpp @@ -50,7 +50,7 @@ private: bool theStop; void run(); - void shutdownSystem(); + void shutdownSystem(const char *last_stuck_action); }; #endif // WatchDog_H diff --git a/ndb/src/kernel/vm/pc.hpp b/ndb/src/kernel/vm/pc.hpp index 2d745d26b1c..6aeda59224f 100644 --- a/ndb/src/kernel/vm/pc.hpp +++ b/ndb/src/kernel/vm/pc.hpp @@ -90,7 +90,7 @@ * @param limit max no of records in rec * @param rec pointer to first record in an array of records */ -#define ptrCheckGuard(ptr, limit, rec) {\ +#define ptrCheckGuardErr(ptr, limit, rec, error) {\ UintR TxxzLimit; \ TxxzLimit = (limit); \ UintR TxxxPtr; \ @@ -99,24 +99,28 @@ if (TxxxPtr < (TxxzLimit)) { \ ; \ } else { \ - progError(__LINE__, ERR_POINTER_NOTINRANGE, __FILE__); \ + progError(__LINE__, error, __FILE__); \ }} - #define ptrAss(ptr, rec) ptr.p = &rec[ptr.i] #define ptrNull(ptr) ptr.p = NULL -#define ptrGuard(ptr) if (ptr.p == NULL) \ - progError(__LINE__, ERR_POINTER_NOTINRANGE, __FILE__) -#define arrGuard(ind, size) if ((ind) >= (size)) \ - progError(__LINE__, ERR_INDEX_NOTINRANGE, __FILE__) +#define ptrGuardErr(ptr, error) if (ptr.p == NULL) \ + progError(__LINE__, error, __FILE__) +#define arrGuardErr(ind, size, error) if ((ind) >= (size)) \ + progError(__LINE__, error, __FILE__) #else #define ptrCheck(ptr, limit, rec) ptr.p = &rec[ptr.i] -#define ptrCheckGuard(ptr, limit, rec) ptr.p = &rec[ptr.i] +#define ptrCheckGuardErr(ptr, limit, rec, error) ptr.p = &rec[ptr.i] #define ptrAss(ptr, rec) ptr.p = &rec[ptr.i] #define ptrNull(ptr) ptr.p = NULL -#define ptrGuard(ptr) -#define arrGuard(ind, size) +#define ptrGuardErr(ptr, error) +#define arrGuardErr(ind, size, error) #endif +#define ptrCheckGuard(ptr, limit, rec) \ + ptrCheckGuardErr(ptr, limit, rec, NDBD_EXIT_POINTER_NOTINRANGE) +#define ptrGuard(ptr) ptrGuardErr(ptr, NDBD_EXIT_POINTER_NOTINRANGE) +#define arrGuard(ind, size) arrGuardErr(ind, size, NDBD_EXIT_INDEX_NOTINRANGE) + // -------- ERROR INSERT MACROS ------- #ifdef ERROR_INSERT #define ERROR_INSERT_VARIABLE UintR cerrorInsert @@ -197,34 +201,31 @@ #define ndbassert(check) \ if((check)){ \ } else { \ - progError(__LINE__, ERR_NDBREQUIRE, __FILE__); \ - } - -#define ndbrequire(check) \ - if((check)){ \ - } else { \ - progError(__LINE__, ERR_NDBREQUIRE, __FILE__); \ - } + progError(__LINE__, NDBD_EXIT_NDBASSERT, __FILE__); \ + } #else #define ndbassert(check) +#endif -#define ndbrequire(check) \ +#define ndbrequireErr(check, error) \ if((check)){ \ } else { \ - progError(__LINE__, ERR_NDBREQUIRE, __FILE__); \ - } -#endif + progError(__LINE__, error, __FILE__); \ + } + +#define ndbrequire(check) \ + ndbrequireErr(check, NDBD_EXIT_NDBREQUIRE) #define CRASH_INSERTION(errorType) \ if (!ERROR_INSERTED((errorType))) { \ } else { \ - progError(__LINE__, ERR_ERROR_INSERT, __FILE__); \ + progError(__LINE__, NDBD_EXIT_ERROR_INSERT, __FILE__); \ } #define CRASH_INSERTION2(errorNum, condition) \ if (!(ERROR_INSERTED(errorNum) && condition)) { \ } else { \ - progError(__LINE__, ERR_ERROR_INSERT, __FILE__); \ + progError(__LINE__, NDBD_EXIT_ERROR_INSERT, __FILE__); \ } #define MEMCOPY_PAGE(to, from, page_size_in_bytes) \ diff --git a/ndb/src/mgmapi/Makefile.am b/ndb/src/mgmapi/Makefile.am index db730bf8c89..efe1b8ea2d5 100644 --- a/ndb/src/mgmapi/Makefile.am +++ b/ndb/src/mgmapi/Makefile.am @@ -1,7 +1,7 @@ noinst_LTLIBRARIES = libmgmapi.la -libmgmapi_la_SOURCES = mgmapi.cpp ndb_logevent.cpp mgmapi_configuration.cpp LocalConfig.cpp +libmgmapi_la_SOURCES = mgmapi.cpp ndb_logevent.cpp mgmapi_configuration.cpp LocalConfig.cpp ../kernel/error/ndbd_exit_codes.c INCLUDES_LOC = -I$(top_srcdir)/ndb/include/mgmapi diff --git a/ndb/src/mgmapi/mgmapi.cpp b/ndb/src/mgmapi/mgmapi.cpp index 8263e8cbc93..d5a821c2287 100644 --- a/ndb/src/mgmapi/mgmapi.cpp +++ b/ndb/src/mgmapi/mgmapi.cpp @@ -2282,4 +2282,33 @@ ndb_mgm_get_mgmd_nodeid(NdbMgmHandle handle) DBUG_RETURN(nodeid); } +extern "C" +int ndb_mgm_report_event(NdbMgmHandle handle, Uint32 *data, Uint32 length) +{ + DBUG_ENTER("ndb_mgm_report_event"); + CHECK_HANDLE(handle, 0); + CHECK_CONNECTED(handle, 0); + + Properties args; + args.put("length", length); + BaseString data_string; + + for (int i = 0; i < length; i++) + data_string.appfmt(" %u", data[i]); + + args.put("data", data_string.c_str()); + + const ParserRow<ParserDummy> reply[]= { + MGM_CMD("report event reply", NULL, ""), + MGM_ARG("result", String, Mandatory, "Result"), + MGM_END() + }; + + const Properties *prop; + prop = ndb_mgm_call(handle, reply, "report event", &args); + CHECK_REPLY(prop, -1); + + DBUG_RETURN(0); +} + template class Vector<const ParserRow<ParserDummy>*>; diff --git a/ndb/src/mgmapi/ndb_logevent.cpp b/ndb/src/mgmapi/ndb_logevent.cpp index 918ec5d6705..a90d5658506 100644 --- a/ndb/src/mgmapi/ndb_logevent.cpp +++ b/ndb/src/mgmapi/ndb_logevent.cpp @@ -152,6 +152,15 @@ struct Ndb_logevent_body_row ndb_logevent_body[]= { ROW( NDBStopStarted, "stoptype", 1, stoptype), + ROW( NDBStopCompleted, "action", 1, action), + ROW( NDBStopCompleted, "signum", 2, signum), + + ROW( NDBStopForced, "action", 1, action), + ROW( NDBStopForced, "signum", 2, signum), + ROW( NDBStopForced, "error", 3, error), + ROW( NDBStopForced, "sphase", 4, sphase), + ROW( NDBStopForced, "extra", 5, extra), + // ROW( NDBStopAborted), ROW( StartREDOLog, "node", 1, node), diff --git a/ndb/src/mgmclient/CommandInterpreter.cpp b/ndb/src/mgmclient/CommandInterpreter.cpp index b5d1f38ba53..0b1b2e3a087 100644 --- a/ndb/src/mgmclient/CommandInterpreter.cpp +++ b/ndb/src/mgmclient/CommandInterpreter.cpp @@ -459,7 +459,9 @@ event_thread_run(void* m) NdbMgmHandle handle= *(NdbMgmHandle*)m; - int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP, 0 }; + int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP, + 1, NDB_MGM_EVENT_CATEGORY_STARTUP, + 0 }; int fd = ndb_mgm_listen_event(handle, filter); if (fd != NDB_INVALID_SOCKET) { diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp index 5a07c5fa1ec..ab0064af7c2 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -1594,8 +1594,13 @@ MgmtSrvr::handleReceivedSignal(NdbApiSignal* signal) case GSN_EVENT_SUBSCRIBE_REF: break; case GSN_EVENT_REP: - eventReport(refToNode(signal->theSendersBlockRef), signal->getDataPtr()); + { + EventReport *rep = CAST_PTR(EventReport, signal->getDataPtrSend()); + if (rep->getNodeId() == 0) + rep->setNodeId(refToNode(signal->theSendersBlockRef)); + eventReport(signal->getDataPtr()); break; + } case GSN_NF_COMPLETEREP: break; @@ -1620,19 +1625,22 @@ MgmtSrvr::handleStatus(NodeId nodeId, bool alive, bool nfComplete) { DBUG_ENTER("MgmtSrvr::handleStatus"); Uint32 theData[25]; + EventReport *rep = (EventReport *)theData; + theData[1] = nodeId; if (alive) { m_started_nodes.push_back(nodeId); - theData[0] = NDB_LE_Connected; + rep->setEventType(NDB_LE_Connected); } else { - theData[0] = NDB_LE_Disconnected; + rep->setEventType(NDB_LE_Connected); if(nfComplete) { DBUG_VOID_RETURN; } } - eventReport(_ownNodeId, theData); + rep->setNodeId(_ownNodeId); + eventReport(theData); DBUG_VOID_RETURN; } @@ -1964,10 +1972,11 @@ MgmtSrvr::getNextNodeId(NodeId * nodeId, enum ndb_mgm_node_type type) const #include "Services.hpp" void -MgmtSrvr::eventReport(NodeId nodeId, const Uint32 * theData) +MgmtSrvr::eventReport(const Uint32 * theData) { const EventReport * const eventReport = (EventReport *)&theData[0]; + NodeId nodeId = eventReport->getNodeId(); Ndb_logevent_type type = eventReport->getEventType(); // Log event g_eventLogger.log(type, theData, nodeId, diff --git a/ndb/src/mgmsrv/MgmtSrvr.hpp b/ndb/src/mgmsrv/MgmtSrvr.hpp index 3b14fa60e6b..9dff185a46d 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.hpp +++ b/ndb/src/mgmsrv/MgmtSrvr.hpp @@ -605,7 +605,7 @@ private: /** * An event from <i>nodeId</i> has arrived */ - void eventReport(NodeId nodeId, const Uint32 * theData); + void eventReport(const Uint32 * theData); //************************************************************************** diff --git a/ndb/src/mgmsrv/Services.cpp b/ndb/src/mgmsrv/Services.cpp index 8c087c2a3ca..d26fbfff188 100644 --- a/ndb/src/mgmsrv/Services.cpp +++ b/ndb/src/mgmsrv/Services.cpp @@ -182,12 +182,6 @@ ParserRow<MgmApiSession> commands[] = { MGM_CMD("abort backup", &MgmApiSession::abortBackup, ""), MGM_ARG("id", Int, Mandatory, "Backup id"), - /** - * Global Replication - */ - MGM_CMD("rep", &MgmApiSession::repCommand, ""), - MGM_ARG("request", Int, Mandatory, "Command"), - MGM_CMD("stop", &MgmApiSession::stop, ""), MGM_ARG("node", String, Mandatory, "Node"), MGM_ARG("abort", Int, Mandatory, "Node"), @@ -253,6 +247,10 @@ ParserRow<MgmApiSession> commands[] = { MGM_CMD("get mgmd nodeid", &MgmApiSession::get_mgmd_nodeid, ""), + MGM_CMD("report event", &MgmApiSession::report_event, ""), + MGM_ARG("length", Int, Mandatory, "Length"), + MGM_ARG("data", String, Mandatory, "Data"), + MGM_END() }; @@ -696,30 +694,6 @@ MgmApiSession::abortBackup(Parser<MgmApiSession>::Context &, m_output->println(""); } -/***************************************************************************** - * Global Replication - *****************************************************************************/ - -void -MgmApiSession::repCommand(Parser<MgmApiSession>::Context &, - Properties const &args) { - - Uint32 request = 0; - args.get("request", &request); - - Uint32 repReqId; - int result = m_mgmsrv.repCommand(&repReqId, request, true); - - m_output->println("global replication reply"); - if(result != 0) - m_output->println("result: %s", get_error_text(result)); - else{ - m_output->println("result: Ok"); - m_output->println("id: %d", repReqId); - } - m_output->println(""); -} - /*****************************************************************************/ void @@ -1573,5 +1547,31 @@ MgmApiSession::get_mgmd_nodeid(Parser_t::Context &ctx, m_output->println(""); } +void +MgmApiSession::report_event(Parser_t::Context &ctx, + Properties const &args) +{ + Uint32 length; + const char *data_string; + Uint32 data[25]; + + args.get("length", &length); + args.get("data", &data_string); + + BaseString tmp(data_string); + Vector<BaseString> item; + tmp.split(item, " "); + for (int i = 0; i < length ; i++) + { + sscanf(item[i].c_str(), "%u", data+i); + } + + m_mgmsrv.eventReport(data); + m_output->println("report event reply"); + m_output->println("result: ok"); + m_output->println(""); +} + template class MutexVector<int>; template class Vector<ParserRow<MgmApiSession> const*>; +template class Vector<BaseString>; diff --git a/ndb/src/mgmsrv/Services.hpp b/ndb/src/mgmsrv/Services.hpp index 431126a1f35..30f220cd060 100644 --- a/ndb/src/mgmsrv/Services.hpp +++ b/ndb/src/mgmsrv/Services.hpp @@ -98,8 +98,8 @@ public: void transporter_connect(Parser_t::Context &ctx, Properties const &args); void get_mgmd_nodeid(Parser_t::Context &ctx, Properties const &args); - - void repCommand(Parser_t::Context &ctx, const class Properties &args); + + void report_event(Parser_t::Context &ctx, Properties const &args); }; class MgmApiService : public SocketServer::Service { diff --git a/ndb/src/ndbapi/NdbRecAttr.cpp b/ndb/src/ndbapi/NdbRecAttr.cpp index 5201c6c9c04..2245707bf65 100644 --- a/ndb/src/ndbapi/NdbRecAttr.cpp +++ b/ndb/src/ndbapi/NdbRecAttr.cpp @@ -233,6 +233,13 @@ NdbOut& operator<<(NdbOut& out, const NdbRecAttr &r) j = length; } break; + case NdbDictionary::Column::Varbinary: + { + unsigned len = *(const unsigned char*)r.aRef(); + ndbrecattr_print_string(out,"Varbinary", r.aRef()+1,len); + j = length; + } + break; case NdbDictionary::Column::Float: out << r.float_value(); break; diff --git a/ndb/src/ndbapi/ndberror.c b/ndb/src/ndbapi/ndberror.c index 3c3893c38ae..4a9ac9affb7 100644 --- a/ndb/src/ndbapi/ndberror.c +++ b/ndb/src/ndbapi/ndberror.c @@ -691,5 +691,7 @@ int ndb_error_string(int err_no, char *str, unsigned int size) ndberror_classification_message(error.classification)); str[size-1]= '\0'; - return len; + if (error.classification != UE) + return len; + return -len; } |