diff options
Diffstat (limited to 'storage/ndb/src')
47 files changed, 1355 insertions, 517 deletions
diff --git a/storage/ndb/src/common/portlib/NdbTick.c b/storage/ndb/src/common/portlib/NdbTick.c index 238e9b1956d..7e54984794f 100644 --- a/storage/ndb/src/common/portlib/NdbTick.c +++ b/storage/ndb/src/common/portlib/NdbTick.c @@ -15,7 +15,7 @@ #include <ndb_global.h> -#include "NdbTick.h" +#include <NdbTick.h> #define NANOSEC_PER_SEC 1000000000 #define MICROSEC_PER_SEC 1000000 @@ -71,7 +71,6 @@ NdbTick_CurrentMicrosecond(NDB_TICKS * secs, Uint32 * micros){ } #endif -#ifdef TIME_MEASUREMENT int NdbTick_getMicroTimer(struct MicroSecondTimer* input_timer) { @@ -102,4 +101,3 @@ NdbTick_getMicrosPassed(struct MicroSecondTimer start, } return ret_value; } -#endif diff --git a/storage/ndb/src/common/transporter/TransporterRegistry.cpp b/storage/ndb/src/common/transporter/TransporterRegistry.cpp index 3e7589a54fe..5f5f3c17b2d 100644 --- a/storage/ndb/src/common/transporter/TransporterRegistry.cpp +++ b/storage/ndb/src/common/transporter/TransporterRegistry.cpp @@ -818,6 +818,7 @@ TransporterRegistry::performReceive() { Uint32 * ptr; Uint32 sz = t->getReceiveData(&ptr); + transporter_recv_from(callbackObj, nodeId); Uint32 szUsed = unpack(ptr, sz, nodeId, ioStates[nodeId]); t->updateReceiveDataPtr(szUsed); } diff --git a/storage/ndb/src/kernel/blocks/ERROR_codes.txt b/storage/ndb/src/kernel/blocks/ERROR_codes.txt index b3405679978..67eb89f850f 100644 --- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt @@ -1,5 +1,5 @@ Next QMGR 1 -Next NDBCNTR 1001 +Next NDBCNTR 1002 Next NDBFS 2000 Next DBACC 3002 Next DBTUP 4029 @@ -523,3 +523,4 @@ Dbtup: NDBCNTR: 1000: Crash insertion on SystemError::CopyFragRef +1001: Delay sending NODE_FAILREP (to own node), until error is cleared diff --git a/storage/ndb/src/kernel/blocks/backup/Backup.cpp b/storage/ndb/src/kernel/blocks/backup/Backup.cpp index d86c22024cd..64e2c41aa69 100644 --- a/storage/ndb/src/kernel/blocks/backup/Backup.cpp +++ b/storage/ndb/src/kernel/blocks/backup/Backup.cpp @@ -448,6 +448,41 @@ Backup::execDUMP_STATE_ORD(Signal* signal) filePtr.p->m_flags); } } + + ndbout_c("m_curr_disk_write_speed: %u m_words_written_this_period: %u m_overflow_disk_write: %u", + m_curr_disk_write_speed, m_words_written_this_period, m_overflow_disk_write); + ndbout_c("m_reset_delay_used: %u m_reset_disk_speed_time: %llu", + m_reset_delay_used, (Uint64)m_reset_disk_speed_time); + for(c_backups.first(ptr); ptr.i != RNIL; c_backups.next(ptr)) + { + ndbout_c("BackupRecord %u: BackupId: %u MasterRef: %x ClientRef: %x", + ptr.i, ptr.p->backupId, ptr.p->masterRef, ptr.p->clientRef); + ndbout_c(" State: %u", ptr.p->slaveState.getState()); + ndbout_c(" noOfByte: %llu noOfRecords: %llu", + ptr.p->noOfBytes, ptr.p->noOfRecords); + ndbout_c(" noOfLogBytes: %llu noOfLogRecords: %llu", + ptr.p->noOfLogBytes, ptr.p->noOfLogRecords); + ndbout_c(" errorCode: %u", ptr.p->errorCode); + BackupFilePtr filePtr; + for(ptr.p->files.first(filePtr); filePtr.i != RNIL; + ptr.p->files.next(filePtr)) + { + ndbout_c(" file %u: type: %u flags: H'%x tableId: %u fragmentId: %u", + filePtr.i, filePtr.p->fileType, filePtr.p->m_flags, + filePtr.p->tableId, filePtr.p->fragmentNo); + } + if (ptr.p->slaveState.getState() == SCANNING && ptr.p->dataFilePtr != RNIL) + { + c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr); + OperationRecord & op = filePtr.p->operation; + Uint32 *tmp = NULL; + Uint32 sz = 0; + bool eof = FALSE; + bool ready = op.dataBuffer.getReadPtr(&tmp, &sz, &eof); + ndbout_c("ready: %s eof: %s", ready ? "TRUE" : "FALSE", eof ? "TRUE" : "FALSE"); + } + } + return; } if(signal->theData[0] == 24){ /** @@ -2771,6 +2806,8 @@ Backup::openFiles(Signal* signal, BackupRecordPtr ptr) c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr); filePtr.p->m_flags |= BackupFile::BF_OPENING; + if (c_defaults.m_o_direct) + req->fileFlags |= FsOpenReq::OM_DIRECT; req->userPointer = filePtr.i; FsOpenReq::setVersion(req->fileNumber, 2); FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_DATA); @@ -3745,12 +3782,31 @@ Backup::OperationRecord::newFragment(Uint32 tableId, Uint32 fragNo) } bool -Backup::OperationRecord::fragComplete(Uint32 tableId, Uint32 fragNo) +Backup::OperationRecord::fragComplete(Uint32 tableId, Uint32 fragNo, bool fill_record) { Uint32 * tmp; const Uint32 footSz = sizeof(BackupFormat::DataFile::FragmentFooter) >> 2; + Uint32 sz = footSz + 1; - if(dataBuffer.getWritePtr(&tmp, footSz + 1)) { + if (fill_record) + { + Uint32 * new_tmp; + if (!dataBuffer.getWritePtr(&tmp, sz)) + return false; + new_tmp = tmp + sz; + + if ((UintPtr)new_tmp & (sizeof(Page32)-1)) + { + /* padding is needed to get full write */ + new_tmp += 2 /* to fit empty header minimum 2 words*/; + new_tmp = (Uint32 *)(((UintPtr)new_tmp + sizeof(Page32)-1) & + ~(UintPtr)(sizeof(Page32)-1)); + /* new write sz */ + sz = new_tmp - tmp; + } + } + + if(dataBuffer.getWritePtr(&tmp, sz)) { jam(); * tmp = 0; // Finish record stream tmp++; @@ -3762,7 +3818,17 @@ Backup::OperationRecord::fragComplete(Uint32 tableId, Uint32 fragNo) foot->FragmentNo = htonl(fragNo); foot->NoOfRecords = htonl(noOfRecords); foot->Checksum = htonl(0); - dataBuffer.updateWritePtr(footSz + 1); + + if (sz != footSz + 1) + { + tmp += footSz; + memset(tmp, 0, (sz - footSz - 1) * 4); + *tmp = htonl(BackupFormat::EMPTY_ENTRY); + tmp++; + *tmp = htonl(sz - footSz - 1); + } + + dataBuffer.updateWritePtr(sz); return true; }//if return false; @@ -3864,8 +3930,13 @@ Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr) return; }//if + BackupRecordPtr ptr LINT_SET_PTR; + c_backupPool.getPtr(ptr, filePtr.p->backupPtr); + OperationRecord & op = filePtr.p->operation; - if(!op.fragComplete(filePtr.p->tableId, filePtr.p->fragmentNo)) { + if(!op.fragComplete(filePtr.p->tableId, filePtr.p->fragmentNo, + c_defaults.m_o_direct)) + { jam(); signal->theData[0] = BackupContinueB::BUFFER_FULL_FRAG_COMPLETE; signal->theData[1] = filePtr.i; @@ -3875,9 +3946,6 @@ Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr) filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_SCAN_THREAD; - BackupRecordPtr ptr LINT_SET_PTR; - c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - if (ptr.p->is_lcp()) { ptr.p->slaveState.setState(STOPPING); @@ -4914,6 +4982,8 @@ Backup::lcp_open_file(Signal* signal, BackupRecordPtr ptr) FsOpenReq::OM_CREATE | FsOpenReq::OM_APPEND | FsOpenReq::OM_AUTOSYNC; + if (c_defaults.m_o_direct) + req->fileFlags |= FsOpenReq::OM_DIRECT; FsOpenReq::v2_setCount(req->fileNumber, 0xFFFFFFFF); req->auto_sync_size = c_defaults.m_disk_synch_size; diff --git a/storage/ndb/src/kernel/blocks/backup/Backup.hpp b/storage/ndb/src/kernel/blocks/backup/Backup.hpp index 32f2e14ac92..3fd9b2967fd 100644 --- a/storage/ndb/src/kernel/blocks/backup/Backup.hpp +++ b/storage/ndb/src/kernel/blocks/backup/Backup.hpp @@ -240,7 +240,7 @@ public: * Once per fragment */ bool newFragment(Uint32 tableId, Uint32 fragNo); - bool fragComplete(Uint32 tableId, Uint32 fragNo); + bool fragComplete(Uint32 tableId, Uint32 fragNo, bool fill_record); /** * Once per scan frag (next) req/conf @@ -534,6 +534,7 @@ public: Uint32 m_disk_write_speed; Uint32 m_disk_synch_size; Uint32 m_diskless; + Uint32 m_o_direct; }; /** diff --git a/storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp b/storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp index ace9dfe5c79..20f8f6650be 100644 --- a/storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp +++ b/storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp @@ -32,7 +32,8 @@ struct BackupFormat { TABLE_LIST = 4, TABLE_DESCRIPTION = 5, GCP_ENTRY = 6, - FRAGMENT_INFO = 7 + FRAGMENT_INFO = 7, + EMPTY_ENTRY = 8 }; struct FileHeader { @@ -93,6 +94,13 @@ struct BackupFormat { Uint32 NoOfRecords; Uint32 Checksum; }; + + /* optional padding for O_DIRECT */ + struct EmptyEntry { + Uint32 SectionType; + Uint32 SectionLength; + /* not used data */ + }; }; /** diff --git a/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp b/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp index 4faa02e494f..2cd2a8a2bee 100644 --- a/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp +++ b/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp @@ -148,10 +148,13 @@ Backup::execREAD_CONFIG_REQ(Signal* signal) c_defaults.m_disk_write_speed = 10 * (1024 * 1024); c_defaults.m_disk_write_speed_sr = 100 * (1024 * 1024); c_defaults.m_disk_synch_size = 4 * (1024 * 1024); - + c_defaults.m_o_direct = true; + Uint32 noBackups = 0, noTables = 0, noAttribs = 0, noFrags = 0; ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS, &c_defaults.m_diskless)); + ndb_mgm_get_int_parameter(p, CFG_DB_O_DIRECT, + &c_defaults.m_o_direct); ndb_mgm_get_int_parameter(p, CFG_DB_CHECKPOINT_SPEED_SR, &c_defaults.m_disk_write_speed_sr); ndb_mgm_get_int_parameter(p, CFG_DB_CHECKPOINT_SPEED, @@ -204,7 +207,7 @@ Backup::execREAD_CONFIG_REQ(Signal* signal) / sizeof(Page32); // We need to allocate an additional of 2 pages. 1 page because of a bug in // ArrayPool and another one for DICTTAINFO. - c_pagePool.setSize(noPages + NO_OF_PAGES_META_FILE + 2); + c_pagePool.setSize(noPages + NO_OF_PAGES_META_FILE + 2, true); { // Init all tables SLList<Table> tables(c_tablePool); diff --git a/storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp b/storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp index d26f36ccf40..bb0bbd6d770 100644 --- a/storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp +++ b/storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp @@ -270,8 +270,8 @@ FsBuffer::getReadPtr(Uint32 ** ptr, Uint32 * sz, bool * _eof){ * ptr = &Tp[Tr]; - DEBUG(ndbout_c("getReadPtr() Tr: %d Tw: %d Ts: %d Tm: %d sz1: %d -> %d", - Tr, Tw, Ts, Tm, sz1, * sz)); + DEBUG(ndbout_c("getReadPtr() Tr: %d Tmw: %d Ts: %d Tm: %d sz1: %d -> %d", + Tr, Tmw, Ts, Tm, sz1, * sz)); return true; } @@ -279,8 +279,8 @@ FsBuffer::getReadPtr(Uint32 ** ptr, Uint32 * sz, bool * _eof){ if(!m_eof){ * _eof = false; - DEBUG(ndbout_c("getReadPtr() Tr: %d Tw: %d Ts: %d Tm: %d sz1: %d -> false", - Tr, Tw, Ts, Tm, sz1)); + DEBUG(ndbout_c("getReadPtr() Tr: %d Tmw: %d Ts: %d Tm: %d sz1: %d -> false", + Tr, Tmw, Ts, Tm, sz1)); return false; } @@ -289,8 +289,8 @@ FsBuffer::getReadPtr(Uint32 ** ptr, Uint32 * sz, bool * _eof){ * _eof = true; * ptr = &Tp[Tr]; - DEBUG(ndbout_c("getReadPtr() Tr: %d Tw: %d Ts: %d Tm: %d sz1: %d -> %d eof", - Tr, Tw, Ts, Tm, sz1, * sz)); + DEBUG(ndbout_c("getReadPtr() Tr: %d Tmw: %d Ts: %d Tm: %d sz1: %d -> %d eof", + Tr, Tmw, Ts, Tm, sz1, * sz)); return false; } @@ -316,13 +316,13 @@ FsBuffer::getWritePtr(Uint32 ** ptr, Uint32 sz){ if(sz1 > sz){ // Note at least 1 word of slack * ptr = &Tp[Tw]; - DEBUG(ndbout_c("getWritePtr(%d) Tr: %d Tw: %d Ts: %d sz1: %d -> true", - sz, Tr, Tw, Ts, sz1)); + DEBUG(ndbout_c("getWritePtr(%d) Tw: %d sz1: %d -> true", + sz, Tw, sz1)); return true; } - DEBUG(ndbout_c("getWritePtr(%d) Tr: %d Tw: %d Ts: %d sz1: %d -> false", - sz, Tr, Tw, Ts, sz1)); + DEBUG(ndbout_c("getWritePtr(%d) Tw: %d sz1: %d -> false", + sz, Tw, sz1)); return false; } @@ -339,11 +339,15 @@ FsBuffer::updateWritePtr(Uint32 sz){ m_free -= sz; if(Tnew < Ts){ m_writeIndex = Tnew; + DEBUG(ndbout_c("updateWritePtr(%d) m_writeIndex: %d", + sz, m_writeIndex)); return; } memcpy(Tp, &Tp[Ts], (Tnew - Ts) << 2); m_writeIndex = Tnew - Ts; + DEBUG(ndbout_c("updateWritePtr(%d) m_writeIndex: %d", + sz, m_writeIndex)); } inline diff --git a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp index ac3acdc6778..edc8c0131db 100644 --- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp +++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp @@ -698,6 +698,9 @@ void Dbdict::execFSCLOSECONF(Signal* signal) case FsConnectRecord::OPEN_READ_SCHEMA2: openSchemaFile(signal, 1, fsPtr.i, false, false); break; + case FsConnectRecord::OPEN_READ_TAB_FILE2: + openTableFile(signal, 1, fsPtr.i, c_readTableRecord.tableId, false); + break; default: jamLine((fsPtr.p->fsState & 0xFFF)); ndbrequire(false); @@ -1073,8 +1076,11 @@ void Dbdict::readTableConf(Signal* signal, void Dbdict::readTableRef(Signal* signal, FsConnectRecordPtr fsPtr) { + /** + * First close corrupt file + */ fsPtr.p->fsState = FsConnectRecord::OPEN_READ_TAB_FILE2; - openTableFile(signal, 1, fsPtr.i, c_readTableRecord.tableId, false); + closeFile(signal, fsPtr.p->filePtr, fsPtr.i); return; }//Dbdict::readTableRef() diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 1fe932aaae8..bc14eec1f98 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -4741,12 +4741,18 @@ void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr) jam(); const Uint32 nodeId = failedNodePtr.i; - if (c_lcpState.m_participatingLQH.get(failedNodePtr.i)){ + if (isMaster() && c_lcpState.m_participatingLQH.get(failedNodePtr.i)) + { /*----------------------------------------------------*/ /* THE NODE WAS INVOLVED IN A LOCAL CHECKPOINT. WE */ /* MUST UPDATE THE ACTIVE STATUS TO INDICATE THAT */ /* THE NODE HAVE MISSED A LOCAL CHECKPOINT. */ /*----------------------------------------------------*/ + + /** + * Bug#28717, Only master should do this, as this status is copied + * to other nodes + */ switch (failedNodePtr.p->activeStatus) { case Sysfile::NS_Active: jam(); diff --git a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp index ba146fce005..64d214d472b 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp +++ b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp @@ -71,7 +71,6 @@ class Dbtup; /* CONSTANTS OF THE LOG PAGES */ /* ------------------------------------------------------------------------- */ #define ZPAGE_HEADER_SIZE 32 -#define ZNO_MBYTES_IN_FILE 16 #define ZPAGE_SIZE 8192 #define ZPAGES_IN_MBYTE 32 #define ZTWOLOG_NO_PAGES_IN_MBYTE 5 @@ -115,9 +114,6 @@ class Dbtup; /* ------------------------------------------------------------------------- */ /* VARIOUS CONSTANTS USED AS FLAGS TO THE FILE MANAGER. */ /* ------------------------------------------------------------------------- */ -#define ZOPEN_READ 0 -#define ZOPEN_WRITE 1 -#define ZOPEN_READ_WRITE 2 #define ZVAR_NO_LOG_PAGE_WORD 1 #define ZLIST_OF_PAIRS 0 #define ZLIST_OF_PAIRS_SYNCH 16 @@ -142,7 +138,7 @@ class Dbtup; /* IN THE MBYTE. */ /* ------------------------------------------------------------------------- */ #define ZFD_HEADER_SIZE 3 -#define ZFD_PART_SIZE 48 +#define ZFD_MBYTE_SIZE 3 #define ZLOG_HEAD_SIZE 8 #define ZNEXT_LOG_SIZE 2 #define ZABORT_LOG_SIZE 3 @@ -169,7 +165,6 @@ class Dbtup; #define ZPOS_LOG_TYPE 0 #define ZPOS_NO_FD 1 #define ZPOS_FILE_NO 2 -#define ZMAX_LOG_FILES_IN_PAGE_ZERO 40 /* ------------------------------------------------------------------------- */ /* THE POSITIONS WITHIN A PREPARE LOG RECORD AND A NEW PREPARE */ /* LOG RECORD. */ @@ -1437,17 +1432,17 @@ public: * header of each log file. That information is used during * system restart to find the tail of the log. */ - UintR logLastPrepRef[16]; + UintR *logLastPrepRef; /** * The max global checkpoint completed before the mbyte in the * log file was started. One variable per mbyte. */ - UintR logMaxGciCompleted[16]; + UintR *logMaxGciCompleted; /** * The max global checkpoint started before the mbyte in the log * file was started. One variable per mbyte. */ - UintR logMaxGciStarted[16]; + UintR *logMaxGciStarted; /** * This variable contains the file name as needed by the file * system when opening the file. @@ -2163,6 +2158,7 @@ private: void execSTART_RECREF(Signal* signal); void execGCP_SAVEREQ(Signal* signal); + void execFSOPENREF(Signal* signal); void execFSOPENCONF(Signal* signal); void execFSCLOSECONF(Signal* signal); void execFSWRITECONF(Signal* signal); @@ -2671,6 +2667,8 @@ private: LogPartRecord *logPartRecord; LogPartRecordPtr logPartPtr; UintR clogPartFileSize; + Uint32 clogFileSize; // In MBYTE + Uint32 cmaxLogFilesInPageZero; // // Configurable LogFileRecord *logFileRecord; @@ -2678,13 +2676,15 @@ private: UintR cfirstfreeLogFile; UintR clogFileFileSize; -#define ZLFO_FILE_SIZE 256 /* MAX 256 OUTSTANDING FILE OPERATIONS */ +#define ZLFO_MIN_FILE_SIZE 256 +// RedoBuffer/32K minimum ZLFO_MIN_FILE_SIZE LogFileOperationRecord *logFileOperationRecord; LogFileOperationRecordPtr lfoPtr; UintR cfirstfreeLfo; UintR clfoFileSize; LogPageRecord *logPageRecord; + void *logPageRecordUnaligned; LogPageRecordPtr logPagePtr; UintR cfirstfreeLogPage; UintR clogPageFileSize; @@ -2695,7 +2695,7 @@ private: UintR cfirstfreePageRef; UintR cpageRefFileSize; -#define ZSCANREC_FILE_SIZE 100 +// Configurable ArrayPool<ScanRecord> c_scanRecordPool; ScanRecordPtr scanptr; UintR cscanNoFreeRec; @@ -2888,6 +2888,7 @@ private: UintR ctransidHash[1024]; Uint32 c_diskless; + Uint32 c_o_direct; Uint32 c_error_insert_table_id; public: diff --git a/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp b/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp index c054c227c8e..d6411ee1cb9 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp +++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp @@ -30,11 +30,11 @@ void Dblqh::initData() cgcprecFileSize = ZGCPREC_FILE_SIZE; chostFileSize = MAX_NDB_NODES; clcpFileSize = ZNO_CONCURRENT_LCP; - clfoFileSize = ZLFO_FILE_SIZE; + clfoFileSize = 0; clogFileFileSize = 0; clogPartFileSize = ZLOG_PART_FILE_SIZE; cpageRefFileSize = ZPAGE_REF_FILE_SIZE; - cscanrecFileSize = ZSCANREC_FILE_SIZE; + cscanrecFileSize = 0; ctabrecFileSize = 0; ctcConnectrecFileSize = 0; ctcNodeFailrecFileSize = MAX_NDB_NODES; @@ -49,6 +49,7 @@ void Dblqh::initData() logFileRecord = 0; logFileOperationRecord = 0; logPageRecord = 0; + logPageRecordUnaligned= 0; pageRefRecord = 0; tablerec = 0; tcConnectionrec = 0; @@ -60,6 +61,8 @@ void Dblqh::initData() cLqhTimeOutCheckCount = 0; cbookedAccOps = 0; m_backup_ptr = RNIL; + clogFileSize = 16; + cmaxLogFilesInPageZero = 40; }//Dblqh::initData() void Dblqh::initRecords() @@ -105,10 +108,13 @@ void Dblqh::initRecords() sizeof(LogFileOperationRecord), clfoFileSize); - logPageRecord = (LogPageRecord*)allocRecord("LogPageRecord", - sizeof(LogPageRecord), - clogPageFileSize, - false); + logPageRecord = + (LogPageRecord*)allocRecordAligned("LogPageRecord", + sizeof(LogPageRecord), + clogPageFileSize, + &logPageRecordUnaligned, + NDB_O_DIRECT_WRITE_ALIGNMENT, + false); pageRefRecord = (PageRefRecord*)allocRecord("PageRefRecord", sizeof(PageRefRecord), @@ -260,6 +266,7 @@ Dblqh::Dblqh(Block_context& ctx): addRecSignal(GSN_START_FRAGREQ, &Dblqh::execSTART_FRAGREQ); addRecSignal(GSN_START_RECREF, &Dblqh::execSTART_RECREF); addRecSignal(GSN_GCP_SAVEREQ, &Dblqh::execGCP_SAVEREQ); + addRecSignal(GSN_FSOPENREF, &Dblqh::execFSOPENREF, true); addRecSignal(GSN_FSOPENCONF, &Dblqh::execFSOPENCONF); addRecSignal(GSN_FSCLOSECONF, &Dblqh::execFSCLOSECONF); addRecSignal(GSN_FSWRITECONF, &Dblqh::execFSWRITECONF); @@ -377,7 +384,7 @@ Dblqh::~Dblqh() sizeof(LogFileOperationRecord), clfoFileSize); - deallocRecord((void**)&logPageRecord, + deallocRecord((void**)&logPageRecordUnaligned, "LogPageRecord", sizeof(LogPageRecord), clogPageFileSize); diff --git a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index 2ffed9749b8..8f42a8039d8 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -1023,6 +1023,11 @@ void Dblqh::execREAD_CONFIG_REQ(Signal* signal) clogPageFileSize+= (16 - mega_byte_part); } + /* maximum number of log file operations */ + clfoFileSize = clogPageFileSize; + if (clfoFileSize < ZLFO_MIN_FILE_SIZE) + clfoFileSize = ZLFO_MIN_FILE_SIZE; + ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_LQH_TABLE, &ctabrecFileSize)); ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_LQH_TC_CONNECT, &ctcConnectrecFileSize)); @@ -1031,14 +1036,44 @@ void Dblqh::execREAD_CONFIG_REQ(Signal* signal) cmaxAccOps = cscanrecFileSize * MAX_PARALLEL_OP_PER_SCAN; ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS, &c_diskless)); + c_o_direct = true; + ndb_mgm_get_int_parameter(p, CFG_DB_O_DIRECT, &c_o_direct); Uint32 tmp= 0; ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_LQH_FRAG, &tmp)); c_fragment_pool.setSize(tmp); + if (!ndb_mgm_get_int_parameter(p, CFG_DB_REDOLOG_FILE_SIZE, + &clogFileSize)) + { + // convert to mbyte + clogFileSize = (clogFileSize + 1024*1024 - 1) / (1024 * 1024); + ndbrequire(clogFileSize >= 4 && clogFileSize <= 1024); + } + + cmaxLogFilesInPageZero = (ZPAGE_SIZE - ZPAGE_HEADER_SIZE - 128) / + (ZFD_MBYTE_SIZE * clogFileSize); + + /** + * "Old" cmaxLogFilesInPageZero was 40 + * Each FD need 3 words per mb, require that they can fit into 1 page + * (atleast 1 FD) + * Is also checked in ConfigInfo.cpp (max FragmentLogFileSize = 1Gb) + * 1Gb = 1024Mb => 3(ZFD_MBYTE_SIZE) * 1024 < 8192 (ZPAGE_SIZE) + */ + if (cmaxLogFilesInPageZero > 40) + { + jam(); + cmaxLogFilesInPageZero = 40; + } + else + { + ndbrequire(cmaxLogFilesInPageZero); + } + initRecords(); initialiseRecordsLab(signal, 0, ref, senderData); - + return; }//Dblqh::execSIZEALT_REP() @@ -11788,9 +11823,9 @@ void Dblqh::sendStartLcp(Signal* signal) Uint32 Dblqh::remainingLogSize(const LogFileRecordPtr &sltCurrLogFilePtr, const LogPartRecordPtr &sltLogPartPtr) { - Uint32 hf = sltCurrLogFilePtr.p->fileNo*ZNO_MBYTES_IN_FILE+sltCurrLogFilePtr.p->currentMbyte; - Uint32 tf = sltLogPartPtr.p->logTailFileNo*ZNO_MBYTES_IN_FILE+sltLogPartPtr.p->logTailMbyte; - Uint32 sz = sltLogPartPtr.p->noLogFiles*ZNO_MBYTES_IN_FILE; + Uint32 hf = sltCurrLogFilePtr.p->fileNo*clogFileSize+sltCurrLogFilePtr.p->currentMbyte; + Uint32 tf = sltLogPartPtr.p->logTailFileNo*clogFileSize+sltLogPartPtr.p->logTailMbyte; + Uint32 sz = sltLogPartPtr.p->noLogFiles*clogFileSize; if (tf > hf) hf += sz; return sz-(hf-tf); } @@ -11848,7 +11883,7 @@ void Dblqh::setLogTail(Signal* signal, Uint32 keepGci) /* ------------------------------------------------------------------------- */ SLT_LOOP: for (tsltIndex = tsltStartMbyte; - tsltIndex <= ZNO_MBYTES_IN_FILE - 1; + tsltIndex <= clogFileSize - 1; tsltIndex++) { if (sltLogFilePtr.p->logMaxGciStarted[tsltIndex] >= keepGci) { /* ------------------------------------------------------------------------- */ @@ -11864,7 +11899,7 @@ void Dblqh::setLogTail(Signal* signal, Uint32 keepGci) /* ------------------------------------------------------------------------- */ /*STEPPING BACK INCLUDES ALSO STEPPING BACK TO THE PREVIOUS LOG FILE. */ /* ------------------------------------------------------------------------- */ - tsltMbyte = ZNO_MBYTES_IN_FILE - 1; + tsltMbyte = clogFileSize - 1; sltLogFilePtr.i = sltLogFilePtr.p->prevLogFile; ptrCheckGuard(sltLogFilePtr, clogFileFileSize, logFileRecord); }//if @@ -11902,7 +11937,7 @@ void Dblqh::setLogTail(Signal* signal, Uint32 keepGci) UintR ToldTailFileNo = sltLogPartPtr.p->logTailFileNo; UintR ToldTailMByte = sltLogPartPtr.p->logTailMbyte; - arrGuard(tsltMbyte, 16); + arrGuard(tsltMbyte, clogFileSize); sltLogPartPtr.p->logTailFileNo = sltLogFilePtr.p->logLastPrepRef[tsltMbyte] >> 16; /* ------------------------------------------------------------------------- */ @@ -12402,6 +12437,26 @@ void Dblqh::execFSOPENCONF(Signal* signal) }//switch }//Dblqh::execFSOPENCONF() +void +Dblqh::execFSOPENREF(Signal* signal) +{ + jamEntry(); + FsRef* ref = (FsRef*)signal->getDataPtr(); + Uint32 err = ref->errorCode; + if (err == FsRef::fsErrInvalidFileSize) + { + char buf[256]; + BaseString::snprintf(buf, sizeof(buf), + "Invalid file size for redo logfile, " + " size only changable with --initial"); + progError(__LINE__, + NDBD_EXIT_INVALID_CONFIG, + buf); + return; + } + + SimulatedBlock::execFSOPENREF(signal); +} /* ************>> */ /* FSREADCONF > */ @@ -13047,7 +13102,7 @@ void Dblqh::openFileInitLab(Signal* signal) { logFilePtr.p->logFileStatus = LogFileRecord::OPEN_INIT; seizeLogpage(signal); - writeSinglePage(signal, (ZNO_MBYTES_IN_FILE * ZPAGES_IN_MBYTE) - 1, + writeSinglePage(signal, (clogFileSize * ZPAGES_IN_MBYTE) - 1, ZPAGE_SIZE - 1, __LINE__); lfoPtr.p->lfoState = LogFileOperationRecord::INIT_WRITE_AT_END; return; @@ -13110,7 +13165,7 @@ void Dblqh::writeInitMbyteLab(Signal* signal) { releaseLfo(signal); logFilePtr.p->currentMbyte = logFilePtr.p->currentMbyte + 1; - if (logFilePtr.p->currentMbyte == ZNO_MBYTES_IN_FILE) { + if (logFilePtr.p->currentMbyte == clogFileSize) { jam(); releaseLogpage(signal); logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_INIT; @@ -13230,7 +13285,7 @@ void Dblqh::initLogfile(Signal* signal, Uint32 fileNo) logFilePtr.p->lastPageWritten = 0; logFilePtr.p->logPageZero = RNIL; logFilePtr.p->currentMbyte = 0; - for (tilIndex = 0; tilIndex <= 15; tilIndex++) { + for (tilIndex = 0; tilIndex < clogFileSize; tilIndex++) { logFilePtr.p->logMaxGciCompleted[tilIndex] = (UintR)-1; logFilePtr.p->logMaxGciStarted[tilIndex] = (UintR)-1; logFilePtr.p->logLastPrepRef[tilIndex] = 0; @@ -13281,8 +13336,14 @@ void Dblqh::openFileRw(Signal* signal, LogFileRecordPtr olfLogFilePtr) signal->theData[3] = olfLogFilePtr.p->fileName[1]; signal->theData[4] = olfLogFilePtr.p->fileName[2]; signal->theData[5] = olfLogFilePtr.p->fileName[3]; - signal->theData[6] = ZOPEN_READ_WRITE | FsOpenReq::OM_AUTOSYNC; + signal->theData[6] = FsOpenReq::OM_READWRITE | FsOpenReq::OM_AUTOSYNC | FsOpenReq::OM_CHECK_SIZE; + if (c_o_direct) + signal->theData[6] |= FsOpenReq::OM_DIRECT; req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord); + Uint64 sz = clogFileSize; + sz *= 1024; sz *= 1024; + req->file_size_hi = sz >> 32; + req->file_size_lo = sz & 0xFFFFFFFF; sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA); }//Dblqh::openFileRw() @@ -13301,7 +13362,9 @@ void Dblqh::openLogfileInit(Signal* signal) signal->theData[3] = logFilePtr.p->fileName[1]; signal->theData[4] = logFilePtr.p->fileName[2]; signal->theData[5] = logFilePtr.p->fileName[3]; - signal->theData[6] = 0x302 | FsOpenReq::OM_AUTOSYNC; + signal->theData[6] = FsOpenReq::OM_READWRITE | FsOpenReq::OM_TRUNCATE | FsOpenReq::OM_CREATE | FsOpenReq::OM_AUTOSYNC; + if (c_o_direct) + signal->theData[6] |= FsOpenReq::OM_DIRECT; req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord); sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA); }//Dblqh::openLogfileInit() @@ -13337,8 +13400,14 @@ void Dblqh::openNextLogfile(Signal* signal) signal->theData[3] = onlLogFilePtr.p->fileName[1]; signal->theData[4] = onlLogFilePtr.p->fileName[2]; signal->theData[5] = onlLogFilePtr.p->fileName[3]; - signal->theData[6] = 2 | FsOpenReq::OM_AUTOSYNC; + signal->theData[6] = FsOpenReq::OM_READWRITE | FsOpenReq::OM_AUTOSYNC | FsOpenReq::OM_CHECK_SIZE; + if (c_o_direct) + signal->theData[6] |= FsOpenReq::OM_DIRECT; req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord); + Uint64 sz = clogFileSize; + sz *= 1024; sz *= 1024; + req->file_size_hi = sz >> 32; + req->file_size_lo = sz & 0xFFFFFFFF; sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA); }//if }//Dblqh::openNextLogfile() @@ -13469,7 +13538,7 @@ void Dblqh::writeFileDescriptor(Signal* signal) /* -------------------------------------------------- */ /* START BY WRITING TO LOG FILE RECORD */ /* -------------------------------------------------- */ - arrGuard(logFilePtr.p->currentMbyte, 16); + arrGuard(logFilePtr.p->currentMbyte, clogFileSize); logFilePtr.p->logMaxGciCompleted[logFilePtr.p->currentMbyte] = logPartPtr.p->logPartNewestCompletedGCI; logFilePtr.p->logMaxGciStarted[logFilePtr.p->currentMbyte] = cnewestGci; @@ -13495,10 +13564,7 @@ void Dblqh::writeFileDescriptor(Signal* signal) /* ------------------------------------------------------------------------- */ void Dblqh::writeFileHeaderOpen(Signal* signal, Uint32 wmoType) { - LogFileRecordPtr wmoLogFilePtr; UintR twmoNoLogDescriptors; - UintR twmoLoop; - UintR twmoIndex; /* -------------------------------------------------- */ /* WRITE HEADER INFORMATION IN THE NEW FILE. */ @@ -13506,52 +13572,44 @@ void Dblqh::writeFileHeaderOpen(Signal* signal, Uint32 wmoType) logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_LOG_TYPE] = ZFD_TYPE; logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_FILE_NO] = logFilePtr.p->fileNo; - if (logPartPtr.p->noLogFiles > ZMAX_LOG_FILES_IN_PAGE_ZERO) { + if (logPartPtr.p->noLogFiles > cmaxLogFilesInPageZero) { jam(); - twmoNoLogDescriptors = ZMAX_LOG_FILES_IN_PAGE_ZERO; + twmoNoLogDescriptors = cmaxLogFilesInPageZero; } else { jam(); twmoNoLogDescriptors = logPartPtr.p->noLogFiles; }//if logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_NO_FD] = twmoNoLogDescriptors; - wmoLogFilePtr.i = logFilePtr.i; - twmoLoop = 0; -WMO_LOOP: - jam(); - if (twmoLoop < twmoNoLogDescriptors) { - jam(); - ptrCheckGuard(wmoLogFilePtr, clogFileFileSize, logFileRecord); - for (twmoIndex = 0; twmoIndex <= ZNO_MBYTES_IN_FILE - 1; twmoIndex++) { - jam(); - arrGuard(((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + - (twmoLoop * ZFD_PART_SIZE)) + twmoIndex, ZPAGE_SIZE); - logPagePtr.p->logPageWord[((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + - (twmoLoop * ZFD_PART_SIZE)) + twmoIndex] = - wmoLogFilePtr.p->logMaxGciCompleted[twmoIndex]; - arrGuard((((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + - (twmoLoop * ZFD_PART_SIZE)) + ZNO_MBYTES_IN_FILE) + - twmoIndex, ZPAGE_SIZE); - logPagePtr.p->logPageWord[(((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + - (twmoLoop * ZFD_PART_SIZE)) + ZNO_MBYTES_IN_FILE) + twmoIndex] = - wmoLogFilePtr.p->logMaxGciStarted[twmoIndex]; - arrGuard((((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + - (twmoLoop * ZFD_PART_SIZE)) + (2 * ZNO_MBYTES_IN_FILE)) + - twmoIndex, ZPAGE_SIZE); - logPagePtr.p->logPageWord[(((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + - (twmoLoop * ZFD_PART_SIZE)) + (2 * ZNO_MBYTES_IN_FILE)) + twmoIndex] = - wmoLogFilePtr.p->logLastPrepRef[twmoIndex]; - }//for - wmoLogFilePtr.i = wmoLogFilePtr.p->prevLogFile; - twmoLoop = twmoLoop + 1; - goto WMO_LOOP; - }//if - logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = - (ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + - (ZFD_PART_SIZE * twmoNoLogDescriptors); - arrGuard(logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX], ZPAGE_SIZE); - logPagePtr.p->logPageWord[logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]] = - ZNEXT_LOG_RECORD_TYPE; + + { + Uint32 pos = ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE; + LogFileRecordPtr filePtr = logFilePtr; + for (Uint32 fd = 0; fd < twmoNoLogDescriptors; fd++) + { + jam(); + ptrCheckGuard(filePtr, clogFileFileSize, logFileRecord); + for (Uint32 mb = 0; mb < clogFileSize; mb ++) + { + jam(); + Uint32 pos0 = pos + fd * (ZFD_MBYTE_SIZE * clogFileSize) + mb; + Uint32 pos1 = pos0 + clogFileSize; + Uint32 pos2 = pos1 + clogFileSize; + arrGuard(pos0, ZPAGE_SIZE); + arrGuard(pos1, ZPAGE_SIZE); + arrGuard(pos2, ZPAGE_SIZE); + logPagePtr.p->logPageWord[pos0] = filePtr.p->logMaxGciCompleted[mb]; + logPagePtr.p->logPageWord[pos1] = filePtr.p->logMaxGciStarted[mb]; + logPagePtr.p->logPageWord[pos2] = filePtr.p->logLastPrepRef[mb]; + } + filePtr.i = filePtr.p->prevLogFile; + } + pos += (twmoNoLogDescriptors * ZFD_MBYTE_SIZE * clogFileSize); + arrGuard(pos, ZPAGE_SIZE); + logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = pos; + logPagePtr.p->logPageWord[pos] = ZNEXT_LOG_RECORD_TYPE; + } + /* ------------------------------------------------------- */ /* THIS IS A SPECIAL WRITE OF THE FIRST PAGE IN THE */ /* LOG FILE. THIS HAS SPECIAL SIGNIFANCE TO FIND */ @@ -13696,9 +13754,9 @@ void Dblqh::openSrLastFileLab(Signal* signal) void Dblqh::readSrLastFileLab(Signal* signal) { logPartPtr.p->logLap = logPagePtr.p->logPageWord[ZPOS_LOG_LAP]; - if (logPartPtr.p->noLogFiles > ZMAX_LOG_FILES_IN_PAGE_ZERO) { + if (logPartPtr.p->noLogFiles > cmaxLogFilesInPageZero) { jam(); - initGciInLogFileRec(signal, ZMAX_LOG_FILES_IN_PAGE_ZERO); + initGciInLogFileRec(signal, cmaxLogFilesInPageZero); } else { jam(); initGciInLogFileRec(signal, logPartPtr.p->noLogFiles); @@ -13723,7 +13781,7 @@ void Dblqh::readSrLastMbyteLab(Signal* signal) logPartPtr.p->lastMbyte = logFilePtr.p->currentMbyte - 1; }//if }//if - arrGuard(logFilePtr.p->currentMbyte, 16); + arrGuard(logFilePtr.p->currentMbyte, clogFileSize); logFilePtr.p->logMaxGciCompleted[logFilePtr.p->currentMbyte] = logPagePtr.p->logPageWord[ZPOS_MAX_GCI_COMPLETED]; logFilePtr.p->logMaxGciStarted[logFilePtr.p->currentMbyte] = @@ -13731,7 +13789,7 @@ void Dblqh::readSrLastMbyteLab(Signal* signal) logFilePtr.p->logLastPrepRef[logFilePtr.p->currentMbyte] = logPagePtr.p->logPageWord[ZLAST_LOG_PREP_REF]; releaseLogpage(signal); - if (logFilePtr.p->currentMbyte < (ZNO_MBYTES_IN_FILE - 1)) { + if (logFilePtr.p->currentMbyte < (clogFileSize - 1)) { jam(); logFilePtr.p->currentMbyte++; readSinglePage(signal, ZPAGES_IN_MBYTE * logFilePtr.p->currentMbyte); @@ -13745,21 +13803,21 @@ void Dblqh::readSrLastMbyteLab(Signal* signal) * ---------------------------------------------------------------------- */ if (logPartPtr.p->lastMbyte == ZNIL) { jam(); - logPartPtr.p->lastMbyte = ZNO_MBYTES_IN_FILE - 1; + logPartPtr.p->lastMbyte = clogFileSize - 1; }//if }//if logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_SR; closeFile(signal, logFilePtr, __LINE__); - if (logPartPtr.p->noLogFiles > ZMAX_LOG_FILES_IN_PAGE_ZERO) { + if (logPartPtr.p->noLogFiles > cmaxLogFilesInPageZero) { Uint32 fileNo; - if (logFilePtr.p->fileNo >= ZMAX_LOG_FILES_IN_PAGE_ZERO) { + if (logFilePtr.p->fileNo >= cmaxLogFilesInPageZero) { jam(); - fileNo = logFilePtr.p->fileNo - ZMAX_LOG_FILES_IN_PAGE_ZERO; + fileNo = logFilePtr.p->fileNo - cmaxLogFilesInPageZero; } else { jam(); fileNo = (logPartPtr.p->noLogFiles + logFilePtr.p->fileNo) - - ZMAX_LOG_FILES_IN_PAGE_ZERO; + cmaxLogFilesInPageZero; }//if if (fileNo == 0) { jam(); @@ -13769,11 +13827,11 @@ void Dblqh::readSrLastMbyteLab(Signal* signal) * -------------------------------------------------------------------- */ fileNo = 1; logPartPtr.p->srRemainingFiles = - logPartPtr.p->noLogFiles - (ZMAX_LOG_FILES_IN_PAGE_ZERO - 1); + logPartPtr.p->noLogFiles - (cmaxLogFilesInPageZero - 1); } else { jam(); logPartPtr.p->srRemainingFiles = - logPartPtr.p->noLogFiles - ZMAX_LOG_FILES_IN_PAGE_ZERO; + logPartPtr.p->noLogFiles - cmaxLogFilesInPageZero; }//if LogFileRecordPtr locLogFilePtr; findLogfile(signal, fileNo, logPartPtr, &locLogFilePtr); @@ -13798,9 +13856,9 @@ void Dblqh::openSrNextFileLab(Signal* signal) void Dblqh::readSrNextFileLab(Signal* signal) { - if (logPartPtr.p->srRemainingFiles > ZMAX_LOG_FILES_IN_PAGE_ZERO) { + if (logPartPtr.p->srRemainingFiles > cmaxLogFilesInPageZero) { jam(); - initGciInLogFileRec(signal, ZMAX_LOG_FILES_IN_PAGE_ZERO); + initGciInLogFileRec(signal, cmaxLogFilesInPageZero); } else { jam(); initGciInLogFileRec(signal, logPartPtr.p->srRemainingFiles); @@ -13808,16 +13866,16 @@ void Dblqh::readSrNextFileLab(Signal* signal) releaseLogpage(signal); logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_SR; closeFile(signal, logFilePtr, __LINE__); - if (logPartPtr.p->srRemainingFiles > ZMAX_LOG_FILES_IN_PAGE_ZERO) { + if (logPartPtr.p->srRemainingFiles > cmaxLogFilesInPageZero) { Uint32 fileNo; - if (logFilePtr.p->fileNo >= ZMAX_LOG_FILES_IN_PAGE_ZERO) { + if (logFilePtr.p->fileNo >= cmaxLogFilesInPageZero) { jam(); - fileNo = logFilePtr.p->fileNo - ZMAX_LOG_FILES_IN_PAGE_ZERO; + fileNo = logFilePtr.p->fileNo - cmaxLogFilesInPageZero; } else { jam(); fileNo = (logPartPtr.p->noLogFiles + logFilePtr.p->fileNo) - - ZMAX_LOG_FILES_IN_PAGE_ZERO; + cmaxLogFilesInPageZero; }//if if (fileNo == 0) { jam(); @@ -13826,11 +13884,11 @@ void Dblqh::readSrNextFileLab(Signal* signal) * -------------------------------------------------------------------- */ fileNo = 1; logPartPtr.p->srRemainingFiles = - logPartPtr.p->srRemainingFiles - (ZMAX_LOG_FILES_IN_PAGE_ZERO - 1); + logPartPtr.p->srRemainingFiles - (cmaxLogFilesInPageZero - 1); } else { jam(); logPartPtr.p->srRemainingFiles = - logPartPtr.p->srRemainingFiles - ZMAX_LOG_FILES_IN_PAGE_ZERO; + logPartPtr.p->srRemainingFiles - cmaxLogFilesInPageZero; }//if LogFileRecordPtr locLogFilePtr; findLogfile(signal, fileNo, logPartPtr, &locLogFilePtr); @@ -14701,7 +14759,7 @@ void Dblqh::srLogLimits(Signal* signal) * EXECUTED. * ----------------------------------------------------------------------- */ while(true) { - ndbrequire(tmbyte < 16); + ndbrequire(tmbyte < clogFileSize); if (logPartPtr.p->logExecState == LogPartRecord::LES_SEARCH_STOP) { if (logFilePtr.p->logMaxGciCompleted[tmbyte] < logPartPtr.p->logLastGci) { jam(); @@ -14742,7 +14800,7 @@ void Dblqh::srLogLimits(Signal* signal) if (logPartPtr.p->logExecState != LogPartRecord::LES_EXEC_LOG) { if (tmbyte == 0) { jam(); - tmbyte = ZNO_MBYTES_IN_FILE - 1; + tmbyte = clogFileSize - 1; logFilePtr.i = logFilePtr.p->prevLogFile; ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord); } else { @@ -15136,7 +15194,7 @@ void Dblqh::execSr(Signal* signal) logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_NO_FD]; logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = (ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + - (noFdDescriptors * ZFD_PART_SIZE); + (noFdDescriptors * ZFD_MBYTE_SIZE * clogFileSize); } break; /* ========================================================================= */ @@ -15176,11 +15234,11 @@ void Dblqh::execSr(Signal* signal) /*---------------------------------------------------------------------------*/ /* START EXECUTION OF A NEW MBYTE IN THE LOG. */ /*---------------------------------------------------------------------------*/ - if (logFilePtr.p->currentMbyte < (ZNO_MBYTES_IN_FILE - 1)) { + if (logFilePtr.p->currentMbyte < (clogFileSize - 1)) { jam(); logPartPtr.p->logExecState = LogPartRecord::LES_EXEC_LOG_NEW_MBYTE; } else { - ndbrequire(logFilePtr.p->currentMbyte == (ZNO_MBYTES_IN_FILE - 1)); + ndbrequire(logFilePtr.p->currentMbyte == (clogFileSize - 1)); jam(); /*---------------------------------------------------------------------------*/ /* WE HAVE TO CHANGE FILE. CLOSE THIS ONE AND THEN OPEN THE NEXT. */ @@ -15375,7 +15433,7 @@ void Dblqh::invalidateLogAfterLastGCI(Signal* signal) { jam(); releaseLfo(signal); releaseLogpage(signal); - if (logPartPtr.p->invalidatePageNo < (ZNO_MBYTES_IN_FILE * ZPAGES_IN_MBYTE - 1)) { + if (logPartPtr.p->invalidatePageNo < (clogFileSize * ZPAGES_IN_MBYTE - 1)) { // We continue in this file. logPartPtr.p->invalidatePageNo++; } else { @@ -16716,6 +16774,22 @@ void Dblqh::initialiseLogFile(Signal* signal) ptrAss(logFilePtr, logFileRecord); logFilePtr.p->nextLogFile = logFilePtr.i + 1; logFilePtr.p->logFileStatus = LogFileRecord::LFS_IDLE; + + logFilePtr.p->logLastPrepRef = new Uint32[clogFileSize]; + logFilePtr.p->logMaxGciCompleted = new Uint32[clogFileSize]; + logFilePtr.p->logMaxGciStarted = new Uint32[clogFileSize]; + + if (logFilePtr.p->logLastPrepRef == 0 || + logFilePtr.p->logMaxGciCompleted == 0 || + logFilePtr.p->logMaxGciStarted == 0) + { + char buf[256]; + BaseString::snprintf(buf, sizeof(buf), + "Failed to alloc mbyte(%u) arrays for logfile %u", + clogFileSize, logFilePtr.i); + progError(__LINE__, NDBD_EXIT_MEMALLOC, buf); + } + }//for logFilePtr.i = clogFileFileSize - 1; ptrAss(logFilePtr, logFileRecord); @@ -17044,41 +17118,31 @@ void Dblqh::initFragrec(Signal* signal, * ========================================================================= */ void Dblqh::initGciInLogFileRec(Signal* signal, Uint32 noFdDescriptors) { - LogFileRecordPtr iglLogFilePtr; - UintR tiglLoop; - UintR tiglIndex; - - tiglLoop = 0; - iglLogFilePtr.i = logFilePtr.i; - iglLogFilePtr.p = logFilePtr.p; -IGL_LOOP: - for (tiglIndex = 0; tiglIndex <= ZNO_MBYTES_IN_FILE - 1; tiglIndex++) { - arrGuard(((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + - (tiglLoop * ZFD_PART_SIZE)) + tiglIndex, ZPAGE_SIZE); - iglLogFilePtr.p->logMaxGciCompleted[tiglIndex] = - logPagePtr.p->logPageWord[((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + - (tiglLoop * ZFD_PART_SIZE)) + tiglIndex]; - arrGuard((((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + ZNO_MBYTES_IN_FILE) + - (tiglLoop * ZFD_PART_SIZE)) + tiglIndex, ZPAGE_SIZE); - iglLogFilePtr.p->logMaxGciStarted[tiglIndex] = - logPagePtr.p->logPageWord[(((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + - ZNO_MBYTES_IN_FILE) + - (tiglLoop * ZFD_PART_SIZE)) + tiglIndex]; - arrGuard((((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + - (2 * ZNO_MBYTES_IN_FILE)) + (tiglLoop * ZFD_PART_SIZE)) + - tiglIndex, ZPAGE_SIZE); - iglLogFilePtr.p->logLastPrepRef[tiglIndex] = - logPagePtr.p->logPageWord[(((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + - (2 * ZNO_MBYTES_IN_FILE)) + - (tiglLoop * ZFD_PART_SIZE)) + tiglIndex]; - }//for - tiglLoop = tiglLoop + 1; - if (tiglLoop < noFdDescriptors) { + LogFileRecordPtr filePtr = logFilePtr; + Uint32 pos = ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE; + for (Uint32 fd = 0; fd < noFdDescriptors; fd++) + { jam(); - iglLogFilePtr.i = iglLogFilePtr.p->prevLogFile; - ptrCheckGuard(iglLogFilePtr, clogFileFileSize, logFileRecord); - goto IGL_LOOP; - }//if + for (Uint32 mb = 0; mb < clogFileSize; mb++) + { + jam(); + Uint32 pos0 = pos + fd * (ZFD_MBYTE_SIZE * clogFileSize) + mb; + Uint32 pos1 = pos0 + clogFileSize; + Uint32 pos2 = pos1 + clogFileSize; + arrGuard(pos0, ZPAGE_SIZE); + arrGuard(pos1, ZPAGE_SIZE); + arrGuard(pos2, ZPAGE_SIZE); + filePtr.p->logMaxGciCompleted[mb] = logPagePtr.p->logPageWord[pos0]; + filePtr.p->logMaxGciStarted[mb] = logPagePtr.p->logPageWord[pos1]; + filePtr.p->logLastPrepRef[mb] = logPagePtr.p->logPageWord[pos2]; + } + if (fd + 1 < noFdDescriptors) + { + jam(); + filePtr.i = filePtr.p->prevLogFile; + ptrCheckGuard(filePtr, clogFileFileSize, logFileRecord); + } + } }//Dblqh::initGciInLogFileRec() /* ========================================================================== @@ -18331,7 +18395,7 @@ void Dblqh::writeNextLog(Signal* signal) ndbrequire(logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] < ZPAGE_SIZE); logPagePtr.p->logPageWord[logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]] = ZNEXT_MBYTE_TYPE; - if (logFilePtr.p->currentMbyte == (ZNO_MBYTES_IN_FILE - 1)) { + if (logFilePtr.p->currentMbyte == (clogFileSize - 1)) { jam(); /* -------------------------------------------------- */ /* CALCULATE THE NEW REMAINING WORDS WHEN */ @@ -18420,7 +18484,7 @@ void Dblqh::writeNextLog(Signal* signal) systemError(signal, __LINE__); }//if }//if - if (logFilePtr.p->currentMbyte == (ZNO_MBYTES_IN_FILE - 1)) { + if (logFilePtr.p->currentMbyte == (clogFileSize - 1)) { jam(); twnlNextMbyte = 0; if (logFilePtr.p->fileChangeState != LogFileRecord::NOT_ONGOING) { diff --git a/storage/ndb/src/kernel/blocks/dblqh/Makefile.am b/storage/ndb/src/kernel/blocks/dblqh/Makefile.am index c7c477a512c..b545096dc83 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/Makefile.am +++ b/storage/ndb/src/kernel/blocks/dblqh/Makefile.am @@ -16,7 +16,7 @@ EXTRA_PROGRAMS = ndbd_redo_log_reader ndbd_redo_log_reader_SOURCES = redoLogReader/records.cpp \ - redoLogReader/redoLogFileReader.cpp + redoLogReader/reader.cpp include $(top_srcdir)/storage/ndb/config/common.mk.am include $(top_srcdir)/storage/ndb/config/type_kernel.mk.am diff --git a/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/redoLogFileReader.cpp b/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/reader.cpp index e5df14aea9a..e5df14aea9a 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/redoLogFileReader.cpp +++ b/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/reader.cpp diff --git a/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp b/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp index 5c0972148dd..f28687dca0d 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp +++ b/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp @@ -2700,6 +2700,10 @@ private: ArrayPool<Page> c_page_pool; Uint32 cnoOfAllocatedPages; + Uint32 m_max_allocate_pages; + + /* read ahead in pages during disk order scan */ + Uint32 m_max_page_read_ahead; Tablerec *tablerec; Uint32 cnoOfTablerec; diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp index 1182ac4ee7d..8e532ae97b5 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp @@ -74,6 +74,10 @@ Dbtup::reportMemoryUsage(Signal* signal, int incDec){ sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 6, JBB); } +#ifdef VM_TRACE +extern Uint32 fc_left, fc_right, fc_remove; +#endif + void Dbtup::execDUMP_STATE_ORD(Signal* signal) { @@ -155,12 +159,20 @@ Dbtup::execDUMP_STATE_ORD(Signal* signal) return; }//if #endif -#if defined VM_TRACE && 0 - if (type == 1211){ - ndbout_c("Startar modul test av Page Manager"); +#if defined VM_TRACE + if (type == 1211 || type == 1212 || type == 1213){ + Uint32 seed = time(0); + if (signal->getLength() > 1) + seed = signal->theData[1]; + ndbout_c("Startar modul test av Page Manager (seed: 0x%x)", seed); + srand(seed); Vector<Chunk> chunks; const Uint32 LOOPS = 1000; + Uint32 sum_req = 0; + Uint32 sum_conf = 0; + Uint32 sum_loop = 0; + Uint32 max_loop = 0; for(Uint32 i = 0; i<LOOPS; i++){ // Case @@ -177,8 +189,15 @@ Dbtup::execDUMP_STATE_ORD(Signal* signal) if(chunks.size() == 0 && c == 0){ c = 1 + rand() % 2; } + + if (type == 1211) + ndbout_c("loop=%d case=%d free=%d alloc=%d", i, c, free, alloc); - ndbout_c("loop=%d case=%d free=%d alloc=%d", i, c, free, alloc); + if (type == 1213) + { + c = 1; + alloc = 2 + (sum_conf >> 3) + (sum_conf >> 4); + } switch(c){ case 0:{ // Release const int ch = rand() % chunks.size(); @@ -190,23 +209,33 @@ Dbtup::execDUMP_STATE_ORD(Signal* signal) case 2: { // Seize(n) - fail alloc += free; // Fall through + sum_req += free; + goto doalloc; } case 1: { // Seize(n) (success) - + sum_req += alloc; + doalloc: Chunk chunk; allocConsPages(alloc, chunk.pageCount, chunk.pageId); ndbrequire(chunk.pageCount <= alloc); if(chunk.pageCount != 0){ chunks.push_back(chunk); if(chunk.pageCount != alloc) { - ndbout_c(" Tried to allocate %d - only allocated %d - free: %d", - alloc, chunk.pageCount, free); + if (type == 1211) + ndbout_c(" Tried to allocate %d - only allocated %d - free: %d", + alloc, chunk.pageCount, free); } } else { ndbout_c(" Failed to alloc %d pages with %d pages free", alloc, free); } + sum_conf += chunk.pageCount; + Uint32 tot = fc_left + fc_right + fc_remove; + sum_loop += tot; + if (tot > max_loop) + max_loop = tot; + for(Uint32 i = 0; i<chunk.pageCount; i++){ PagePtr pagePtr; pagePtr.i = chunk.pageId + i; @@ -225,6 +254,10 @@ Dbtup::execDUMP_STATE_ORD(Signal* signal) returnCommonArea(chunk.pageId, chunk.pageCount); chunks.erase(chunks.size() - 1); } + + ndbout_c("Got %u%% of requested allocs, loops : %u 100*avg: %u max: %u", + (100 * sum_conf) / sum_req, sum_loop, 100*sum_loop / LOOPS, + max_loop); } #endif }//Dbtup::execDUMP_STATE_ORD() diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp index a9f0083a2b6..74c7d38bd64 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp @@ -305,6 +305,12 @@ void Dbtup::execREAD_CONFIG_REQ(Signal* signal) Uint32 noOfTriggers= 0; Uint32 tmp= 0; + + if (ndb_mgm_get_int_parameter(p, CFG_DB_MAX_ALLOCATE, &tmp)) + tmp = 32 * 1024 * 1024; + m_max_allocate_pages = (tmp + GLOBAL_PAGE_SIZE - 1) / GLOBAL_PAGE_SIZE; + + tmp = 0; ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_TUP_PAGE_RANGE, &tmp)); initPageRangeSize(tmp); ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_TUP_TABLE, &cnoOfTablerec)); @@ -338,6 +344,18 @@ void Dbtup::execREAD_CONFIG_REQ(Signal* signal) ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_BATCH_SIZE, &nScanBatch)); c_scanLockPool.setSize(nScanOp * nScanBatch); + + /* read ahead for disk scan can not be more that disk page buffer */ + { + Uint64 tmp = 64*1024*1024; + ndb_mgm_get_int64_parameter(p, CFG_DB_DISK_PAGE_BUFFER_MEMORY, &tmp); + m_max_page_read_ahead = (tmp + GLOBAL_PAGE_SIZE - 1) / GLOBAL_PAGE_SIZE; // in pages + // never read ahead more than 32 pages + if (m_max_page_read_ahead > 32) + m_max_page_read_ahead = 32; + } + + ScanOpPtr lcp; ndbrequire(c_scanOpPool.seize(lcp)); new (lcp.p) ScanOp(); diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp index d10fabf42da..24806062fcf 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp @@ -146,10 +146,17 @@ void Dbtup::initializePage() cnoOfAllocatedPages = tmp; // Is updated by returnCommonArea }//Dbtup::initializePage() +#ifdef VM_TRACE +Uint32 fc_left, fc_right, fc_remove; +#endif + void Dbtup::allocConsPages(Uint32 noOfPagesToAllocate, Uint32& noOfPagesAllocated, Uint32& allocPageRef) { +#ifdef VM_TRACE + fc_left = fc_right = fc_remove = 0; +#endif if (noOfPagesToAllocate == 0){ jam(); noOfPagesAllocated = 0; @@ -228,7 +235,10 @@ void Dbtup::findFreeLeftNeighbours(Uint32& allocPageRef, { PagePtr pageFirstPtr, pageLastPtr; Uint32 remainAllocate = noOfPagesToAllocate - noPagesAllocated; - while (allocPageRef > 0) { + Uint32 loop = 0; + while (allocPageRef > 0 && + ++loop < 16) + { jam(); pageLastPtr.i = allocPageRef - 1; c_page_pool.getPtr(pageLastPtr); @@ -256,6 +266,9 @@ void Dbtup::findFreeLeftNeighbours(Uint32& allocPageRef, remainAllocate -= listSize; }//if }//if +#ifdef VM_TRACE + fc_left++; +#endif }//while }//Dbtup::findFreeLeftNeighbours() @@ -269,7 +282,10 @@ void Dbtup::findFreeRightNeighbours(Uint32& allocPageRef, jam(); return; }//if - while ((allocPageRef + noPagesAllocated) < c_page_pool.getSize()) { + Uint32 loop = 0; + while ((allocPageRef + noPagesAllocated) < c_page_pool.getSize() && + ++loop < 16) + { jam(); pageFirstPtr.i = allocPageRef + noPagesAllocated; c_page_pool.getPtr(pageFirstPtr); @@ -296,24 +312,37 @@ void Dbtup::findFreeRightNeighbours(Uint32& allocPageRef, remainAllocate -= listSize; }//if }//if +#ifdef VM_TRACE + fc_right++; +#endif }//while }//Dbtup::findFreeRightNeighbours() void Dbtup::insertCommonArea(Uint32 insPageRef, Uint32 insList) { cnoOfAllocatedPages -= (1 << insList); - PagePtr pageLastPtr, pageInsPtr; + PagePtr pageLastPtr, pageInsPtr, pageHeadPtr; + pageHeadPtr.i = cfreepageList[insList]; c_page_pool.getPtr(pageInsPtr, insPageRef); ndbrequire(insList < 16); pageLastPtr.i = (pageInsPtr.i + (1 << insList)) - 1; - pageInsPtr.p->next_cluster_page = cfreepageList[insList]; + pageInsPtr.p->page_state = ZFREE_COMMON; + pageInsPtr.p->next_cluster_page = pageHeadPtr.i; pageInsPtr.p->prev_cluster_page = RNIL; pageInsPtr.p->last_cluster_page = pageLastPtr.i; cfreepageList[insList] = pageInsPtr.i; + if (pageHeadPtr.i != RNIL) + { + jam(); + c_page_pool.getPtr(pageHeadPtr); + pageHeadPtr.p->prev_cluster_page = pageInsPtr.i; + } + c_page_pool.getPtr(pageLastPtr); + pageLastPtr.p->page_state = ZFREE_COMMON; pageLastPtr.p->first_cluster_page = pageInsPtr.i; pageLastPtr.p->next_page = RNIL; }//Dbtup::insertCommonArea() @@ -321,12 +350,13 @@ void Dbtup::insertCommonArea(Uint32 insPageRef, Uint32 insList) void Dbtup::removeCommonArea(Uint32 remPageRef, Uint32 list) { cnoOfAllocatedPages += (1 << list); - PagePtr pagePrevPtr, pageNextPtr, pageLastPtr, pageSearchPtr, remPagePtr; + PagePtr pagePrevPtr, pageNextPtr, pageLastPtr, remPagePtr; c_page_pool.getPtr(remPagePtr, remPageRef); ndbrequire(list < 16); if (cfreepageList[list] == remPagePtr.i) { jam(); + ndbassert(remPagePtr.p->prev_cluster_page == RNIL); cfreepageList[list] = remPagePtr.p->next_cluster_page; pageNextPtr.i = cfreepageList[list]; if (pageNextPtr.i != RNIL) { @@ -335,30 +365,25 @@ void Dbtup::removeCommonArea(Uint32 remPageRef, Uint32 list) pageNextPtr.p->prev_cluster_page = RNIL; }//if } else { - pageSearchPtr.i = cfreepageList[list]; - while (true) { - jam(); - c_page_pool.getPtr(pageSearchPtr); - pagePrevPtr = pageSearchPtr; - pageSearchPtr.i = pageSearchPtr.p->next_cluster_page; - if (pageSearchPtr.i == remPagePtr.i) { - jam(); - break; - }//if - }//while + pagePrevPtr.i = remPagePtr.p->prev_cluster_page; pageNextPtr.i = remPagePtr.p->next_cluster_page; + c_page_pool.getPtr(pagePrevPtr); pagePrevPtr.p->next_cluster_page = pageNextPtr.i; - if (pageNextPtr.i != RNIL) { + if (pageNextPtr.i != RNIL) + { jam(); c_page_pool.getPtr(pageNextPtr); pageNextPtr.p->prev_cluster_page = pagePrevPtr.i; - }//if + } }//if remPagePtr.p->next_cluster_page= RNIL; remPagePtr.p->last_cluster_page= RNIL; remPagePtr.p->prev_cluster_page= RNIL; + remPagePtr.p->page_state = ~ZFREE_COMMON; pageLastPtr.i = (remPagePtr.i + (1 << list)) - 1; c_page_pool.getPtr(pageLastPtr); pageLastPtr.p->first_cluster_page= RNIL; + pageLastPtr.p->page_state = ~ZFREE_COMMON; + }//Dbtup::removeCommonArea() diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp index 6ef8d3585e9..cde63091cfb 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp @@ -432,6 +432,11 @@ void Dbtup::allocMoreFragPages(Fragrecord* const regFragPtr) // We will grow by 18.75% plus two more additional pages to grow // a little bit quicker in the beginning. /* -----------------------------------------------------------------*/ + + if (noAllocPages > m_max_allocate_pages) + { + noAllocPages = m_max_allocate_pages; + } Uint32 allocated = allocFragPages(regFragPtr, noAllocPages); regFragPtr->noOfPagesToGrow += allocated; }//Dbtup::allocMoreFragPages() diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp index 948cd77b437..6e53531e118 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp @@ -687,13 +687,74 @@ Dbtup::scanNext(Signal* signal, ScanOpPtr scanPtr) // move to next extent jam(); pos.m_extent_info_ptr_i = ext_ptr.i; - Extent_info* ext = c_extent_pool.getPtr(pos.m_extent_info_ptr_i); + ext = c_extent_pool.getPtr(pos.m_extent_info_ptr_i); key.m_file_no = ext->m_key.m_file_no; key.m_page_no = ext->m_first_page_no; } } key.m_page_idx = 0; pos.m_get = ScanPos::Get_page_dd; + /* + read ahead for scan in disk order + do read ahead every 8:th page + */ + if ((bits & ScanOp::SCAN_DD) && + (((key.m_page_no - ext->m_first_page_no) & 7) == 0)) + { + jam(); + // initialize PGMAN request + Page_cache_client::Request preq; + preq.m_page = pos.m_key; + preq.m_callback = TheNULLCallback; + + // set maximum read ahead + Uint32 read_ahead = m_max_page_read_ahead; + + while (true) + { + // prepare page read ahead in current extent + Uint32 page_no = preq.m_page.m_page_no; + Uint32 page_no_limit = page_no + read_ahead; + Uint32 limit = ext->m_first_page_no + alloc.m_extent_size; + if (page_no_limit > limit) + { + jam(); + // read ahead crosses extent, set limit for this extent + read_ahead = page_no_limit - limit; + page_no_limit = limit; + // and make sure we only read one extra extent next time around + if (read_ahead > alloc.m_extent_size) + read_ahead = alloc.m_extent_size; + } + else + { + jam(); + read_ahead = 0; // no more to read ahead after this + } + // do read ahead pages for this extent + while (page_no < page_no_limit) + { + // page request to PGMAN + jam(); + preq.m_page.m_page_no = page_no; + int flags = 0; + // ignore result + m_pgman.get_page(signal, preq, flags); + jamEntry(); + page_no++; + } + if (!read_ahead || !list.next(ext_ptr)) + { + // no more extents after this or read ahead done + jam(); + break; + } + // move to next extent and initialize PGMAN request accordingly + Extent_info* ext = c_extent_pool.getPtr(ext_ptr.i); + preq.m_page.m_file_no = ext->m_key.m_file_no; + preq.m_page.m_page_no = ext->m_first_page_no; + } + } // if ScanOp::SCAN_DD read ahead } /*FALLTHRU*/ case ScanPos::Get_page_dd: @@ -726,6 +787,7 @@ Dbtup::scanNext(Signal* signal, ScanOpPtr scanPtr) safe_cast(&Dbtup::disk_page_tup_scan_callback); int flags = 0; int res = m_pgman.get_page(signal, preq, flags); + jamEntry(); if (res == 0) { jam(); // request queued diff --git a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp index adc6d1e3ed4..56ecc8ddc39 100644 --- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp @@ -277,6 +277,14 @@ void Ndbcntr::execSTTOR(Signal* signal) break; case ZSTART_PHASE_1: jam(); + { + Uint32 db_watchdog_interval = 0; + const ndb_mgm_configuration_iterator * p = + m_ctx.m_config.getOwnConfigIterator(); + ndb_mgm_get_int_parameter(p, CFG_DB_WATCHDOG_INTERVAL, &db_watchdog_interval); + ndbrequire(db_watchdog_interval); + update_watch_dog_timer(db_watchdog_interval); + } startPhase1Lab(signal); break; case ZSTART_PHASE_2: @@ -1410,6 +1418,13 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal) { jamEntry(); + if (ERROR_INSERTED(1001)) + { + sendSignalWithDelay(reference(), GSN_NODE_FAILREP, signal, 100, + signal->getLength()); + return; + } + const NodeFailRep * nodeFail = (NodeFailRep *)&signal->theData[0]; NdbNodeBitmask allFailed; allFailed.assign(NdbNodeBitmask::Size, nodeFail->theNodes); @@ -2734,16 +2749,34 @@ void Ndbcntr::execSTART_ORD(Signal* signal){ c_missra.execSTART_ORD(signal); } +#define CLEAR_DX 13 +#define CLEAR_LCP 3 + void -Ndbcntr::clearFilesystem(Signal* signal){ +Ndbcntr::clearFilesystem(Signal* signal) +{ + const Uint32 lcp = c_fsRemoveCount >= CLEAR_DX; + FsRemoveReq * req = (FsRemoveReq *)signal->getDataPtrSend(); req->userReference = reference(); req->userPointer = 0; req->directory = 1; req->ownDirectory = 1; - FsOpenReq::setVersion(req->fileNumber, 3); - FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL); // Can by any... - FsOpenReq::v1_setDisk(req->fileNumber, c_fsRemoveCount); + + if (lcp == 0) + { + FsOpenReq::setVersion(req->fileNumber, 3); + FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL); // Can by any... + FsOpenReq::v1_setDisk(req->fileNumber, c_fsRemoveCount); + } + else + { + FsOpenReq::setVersion(req->fileNumber, 5); + FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_DATA); + FsOpenReq::v5_setLcpNo(req->fileNumber, c_fsRemoveCount - CLEAR_DX); + FsOpenReq::v5_setTableId(req->fileNumber, 0); + FsOpenReq::v5_setFragmentId(req->fileNumber, 0); + } sendSignal(NDBFS_REF, GSN_FSREMOVEREQ, signal, FsRemoveReq::SignalLength, JBA); c_fsRemoveCount++; @@ -2752,12 +2785,12 @@ Ndbcntr::clearFilesystem(Signal* signal){ void Ndbcntr::execFSREMOVECONF(Signal* signal){ jamEntry(); - if(c_fsRemoveCount == 13){ + if(c_fsRemoveCount == CLEAR_DX + CLEAR_LCP){ jam(); sendSttorry(signal); } else { jam(); - ndbrequire(c_fsRemoveCount < 13); + ndbrequire(c_fsRemoveCount < CLEAR_DX + CLEAR_LCP); clearFilesystem(signal); }//if } diff --git a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp index 5f93ee31bc7..cf18bf34040 100644 --- a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp +++ b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp @@ -163,7 +163,12 @@ AsyncFile::run() theStartFlag = true; // Create write buffer for bigger writes theWriteBufferSize = WRITEBUFFERSIZE; - theWriteBuffer = (char *) ndbd_malloc(theWriteBufferSize); + theWriteBufferUnaligned = (char *) ndbd_malloc(theWriteBufferSize + + NDB_O_DIRECT_WRITE_ALIGNMENT-1); + theWriteBuffer = (char *) + (((UintPtr)theWriteBufferUnaligned + NDB_O_DIRECT_WRITE_ALIGNMENT - 1) & + ~(UintPtr)(NDB_O_DIRECT_WRITE_ALIGNMENT - 1)); + NdbMutex_Unlock(theStartMutexPtr); NdbCondition_Signal(theStartConditionPtr); @@ -247,6 +252,78 @@ AsyncFile::run() static char g_odirect_readbuf[2*GLOBAL_PAGE_SIZE -1]; #endif +int +AsyncFile::check_odirect_write(Uint32 flags, int& new_flags, int mode) +{ + assert(new_flags & (O_CREAT | O_TRUNC)); +#ifdef O_DIRECT + int ret; + char * bufptr = (char*)((UintPtr(g_odirect_readbuf)+(GLOBAL_PAGE_SIZE - 1)) & ~(GLOBAL_PAGE_SIZE - 1)); + while (((ret = ::write(theFd, bufptr, GLOBAL_PAGE_SIZE)) == -1) && + (errno == EINTR)); + if (ret == -1) + { + new_flags &= ~O_DIRECT; + ndbout_c("%s Failed to write using O_DIRECT, disabling", + theFileName.c_str()); + } + + close(theFd); + theFd = ::open(theFileName.c_str(), new_flags, mode); + if (theFd == -1) + return errno; +#endif + + return 0; +} + +int +AsyncFile::check_odirect_read(Uint32 flags, int &new_flags, int mode) +{ +#ifdef O_DIRECT + int ret; + char * bufptr = (char*)((UintPtr(g_odirect_readbuf)+(GLOBAL_PAGE_SIZE - 1)) & ~(GLOBAL_PAGE_SIZE - 1)); + while (((ret = ::read(theFd, bufptr, GLOBAL_PAGE_SIZE)) == -1) && + (errno == EINTR)); + if (ret == -1) + { + ndbout_c("%s Failed to read using O_DIRECT, disabling", + theFileName.c_str()); + goto reopen; + } + + if(lseek(theFd, 0, SEEK_SET) != 0) + { + return errno; + } + + if ((flags & FsOpenReq::OM_CHECK_SIZE) == 0) + { + struct stat buf; + if ((fstat(theFd, &buf) == -1)) + { + return errno; + } + else if ((buf.st_size % GLOBAL_PAGE_SIZE) != 0) + { + ndbout_c("%s filesize not a multiple of %d, disabling O_DIRECT", + theFileName.c_str(), GLOBAL_PAGE_SIZE); + goto reopen; + } + } + + return 0; + +reopen: + close(theFd); + new_flags &= ~O_DIRECT; + theFd = ::open(theFileName.c_str(), new_flags, mode); + if (theFd == -1) + return errno; +#endif + return 0; +} + void AsyncFile::openReq(Request* request) { m_auto_sync_freq = 0; @@ -312,7 +389,7 @@ void AsyncFile::openReq(Request* request) } #else Uint32 flags = request->par.open.flags; - Uint32 new_flags = 0; + int new_flags = 0; // Convert file open flags from Solaris to Liux if (flags & FsOpenReq::OM_CREATE) @@ -343,10 +420,6 @@ void AsyncFile::openReq(Request* request) { new_flags |= O_DIRECT; } -#elif defined O_SYNC - { - flags |= FsOpenReq::OM_SYNC; - } #endif if ((flags & FsOpenReq::OM_SYNC) && ! (flags & FsOpenReq::OM_INIT)) @@ -355,15 +428,19 @@ void AsyncFile::openReq(Request* request) new_flags |= O_SYNC; #endif } - + + const char * rw = ""; switch(flags & 0x3){ case FsOpenReq::OM_READONLY: + rw = "r"; new_flags |= O_RDONLY; break; case FsOpenReq::OM_WRITEONLY: + rw = "w"; new_flags |= O_WRONLY; break; case FsOpenReq::OM_READWRITE: + rw = "rw"; new_flags |= O_RDWR; break; default: @@ -404,11 +481,6 @@ no_odirect: if (new_flags & O_DIRECT) { new_flags &= ~O_DIRECT; - flags |= FsOpenReq::OM_SYNC; -#ifdef O_SYNC - if (! (flags & FsOpenReq::OM_INIT)) - new_flags |= O_SYNC; -#endif goto no_odirect; } #endif @@ -421,11 +493,6 @@ no_odirect: else if (new_flags & O_DIRECT) { new_flags &= ~O_DIRECT; - flags |= FsOpenReq::OM_SYNC; -#ifdef O_SYNC - if (! (flags & FsOpenReq::OM_INIT)) - new_flags |= O_SYNC; -#endif goto no_odirect; } #endif @@ -512,7 +579,6 @@ no_odirect: { ndbout_c("error on first write(%d), disable O_DIRECT", err); new_flags &= ~O_DIRECT; - flags |= FsOpenReq::OM_SYNC; close(theFd); theFd = ::open(theFileName.c_str(), new_flags, mode); if (theFd != -1) @@ -532,26 +598,32 @@ no_odirect: else if (flags & FsOpenReq::OM_DIRECT) { #ifdef O_DIRECT - do { - int ret; - char * bufptr = (char*)((UintPtr(g_odirect_readbuf)+(GLOBAL_PAGE_SIZE - 1)) & ~(GLOBAL_PAGE_SIZE - 1)); - while (((ret = ::read(theFd, bufptr, GLOBAL_PAGE_SIZE)) == -1) && (errno == EINTR)); - if (ret == -1) - { - ndbout_c("%s Failed to read using O_DIRECT, disabling", theFileName.c_str()); - flags |= FsOpenReq::OM_SYNC; - flags |= FsOpenReq::OM_INIT; - break; - } - if(lseek(theFd, 0, SEEK_SET) != 0) - { - request->error = errno; - return; - } - } while (0); + if (flags & (FsOpenReq::OM_TRUNCATE | FsOpenReq::OM_CREATE)) + { + request->error = check_odirect_write(flags, new_flags, mode); + } + else + { + request->error = check_odirect_read(flags, new_flags, mode); + } + + if (request->error) + return; #endif } - +#ifdef VM_TRACE + if (flags & FsOpenReq::OM_DIRECT) + { +#ifdef O_DIRECT + ndbout_c("%s %s O_DIRECT: %d", + theFileName.c_str(), rw, + !!(new_flags & O_DIRECT)); +#else + ndbout_c("%s %s O_DIRECT: 0", + theFileName.c_str(), rw); +#endif + } +#endif if ((flags & FsOpenReq::OM_SYNC) && (flags & FsOpenReq::OM_INIT)) { #ifdef O_SYNC @@ -562,6 +634,10 @@ no_odirect: new_flags &= ~(O_CREAT | O_TRUNC); new_flags |= O_SYNC; theFd = ::open(theFileName.c_str(), new_flags, mode); + if (theFd == -1) + { + request->error = errno; + } #endif } #endif @@ -1079,7 +1155,8 @@ AsyncFile::rmrfReq(Request * request, char * path, bool removePath){ void AsyncFile::endReq() { // Thread is ended with return - if (theWriteBuffer) ndbd_free(theWriteBuffer, theWriteBufferSize); + if (theWriteBufferUnaligned) + ndbd_free(theWriteBufferUnaligned, theWriteBufferSize); } diff --git a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp index cc667225ce2..d8d585c47f7 100644 --- a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp +++ b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp @@ -234,9 +234,13 @@ private: bool theStartFlag; int theWriteBufferSize; char* theWriteBuffer; + void* theWriteBufferUnaligned; size_t m_write_wo_sync; // Writes wo/ sync size_t m_auto_sync_freq; // Auto sync freq in bytes + + int check_odirect_read(Uint32 flags, int&new_flags, int mode); + int check_odirect_write(Uint32 flags, int&new_flags, int mode); public: SimulatedBlock& m_fs; Ptr<GlobalPage> m_page_ptr; diff --git a/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp b/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp index 44f8a8ab05b..26bf8878852 100644 --- a/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp +++ b/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp @@ -652,7 +652,7 @@ AsyncFile* Ndbfs::createAsyncFile(){ // Check limit of open files - if (m_maxFiles !=0 && theFiles.size()+1 == m_maxFiles) { + if (m_maxFiles !=0 && theFiles.size() == m_maxFiles) { // Print info about all open files for (unsigned i = 0; i < theFiles.size(); i++){ AsyncFile* file = theFiles[i]; diff --git a/storage/ndb/src/kernel/blocks/pgman.cpp b/storage/ndb/src/kernel/blocks/pgman.cpp index 57563d3c6d4..006673cf011 100644 --- a/storage/ndb/src/kernel/blocks/pgman.cpp +++ b/storage/ndb/src/kernel/blocks/pgman.cpp @@ -122,9 +122,9 @@ Pgman::execREAD_CONFIG_REQ(Signal* signal) if (page_buffer > 0) { - page_buffer /= GLOBAL_PAGE_SIZE; // in pages - m_page_entry_pool.setSize(100*page_buffer); + page_buffer = (page_buffer + GLOBAL_PAGE_SIZE - 1) / GLOBAL_PAGE_SIZE; // in pages m_param.m_max_pages = page_buffer; + m_page_entry_pool.setSize(m_param.m_lirs_stack_mult * page_buffer); m_param.m_max_hot_pages = (page_buffer * 9) / 10; } @@ -141,9 +141,10 @@ Pgman::execREAD_CONFIG_REQ(Signal* signal) Pgman::Param::Param() : m_max_pages(64), // smallish for testing + m_lirs_stack_mult(10), m_max_hot_pages(56), m_max_loop_count(256), - m_max_io_waits(64), + m_max_io_waits(256), m_stats_loop_delay(1000), m_cleanup_loop_delay(200), m_lcp_loop_delay(0) @@ -301,6 +302,9 @@ Pgman::get_sublist_no(Page_state state) { return Page_entry::SL_LOCKED; } + if (state == Page_entry::ONSTACK) { + return Page_entry::SL_IDLE; + } return Page_entry::SL_OTHER; } @@ -415,15 +419,55 @@ Pgman::get_page_entry(Ptr<Page_entry>& ptr, Uint32 file_no, Uint32 page_no) { if (find_page_entry(ptr, file_no, page_no)) { + jam(); ndbrequire(ptr.p->m_state != 0); m_stats.m_page_hits++; + +#ifdef VM_TRACE + debugOut << "PGMAN: get_page_entry: found" << endl; + debugOut << "PGMAN: " << ptr << endl; +#endif return true; } + if (m_page_entry_pool.getNoOfFree() == 0) + { + jam(); + Page_sublist& pl_idle = *m_page_sublist[Page_entry::SL_IDLE]; + Ptr<Page_entry> idle_ptr; + if (pl_idle.first(idle_ptr)) + { + jam(); + +#ifdef VM_TRACE + debugOut << "PGMAN: get_page_entry: re-use idle entry" << endl; + debugOut << "PGMAN: " << idle_ptr << endl; +#endif + + Page_state state = idle_ptr.p->m_state; + ndbrequire(state == Page_entry::ONSTACK); + + Page_stack& pl_stack = m_page_stack; + ndbrequire(pl_stack.hasPrev(idle_ptr)); + pl_stack.remove(idle_ptr); + state &= ~ Page_entry::ONSTACK; + set_page_state(idle_ptr, state); + ndbrequire(idle_ptr.p->m_state == 0); + + release_page_entry(idle_ptr); + } + } + if (seize_page_entry(ptr, file_no, page_no)) { + jam(); ndbrequire(ptr.p->m_state == 0); m_stats.m_page_faults++; + +#ifdef VM_TRACE + debugOut << "PGMAN: get_page_entry: seize" << endl; + debugOut << "PGMAN: " << ptr << endl; +#endif return true; } @@ -624,6 +668,7 @@ Pgman::lirs_reference(Ptr<Page_entry> ptr) jam(); move_cleanup_ptr(ptr); pl_queue.remove(ptr); + state &= ~ Page_entry::ONQUEUE; } if (state & Page_entry::BOUND) { @@ -654,6 +699,12 @@ Pgman::lirs_reference(Ptr<Page_entry> ptr) pl_stack.add(ptr); state |= Page_entry::ONSTACK; state |= Page_entry::HOT; + // it could be on queue already + if (state & Page_entry::ONQUEUE) { + jam(); + pl_queue.remove(ptr); + state &= ~Page_entry::ONQUEUE; + } } set_page_state(ptr, state); @@ -902,9 +953,11 @@ Pgman::process_map(Signal* signal) #ifdef VM_TRACE debugOut << "PGMAN: >process_map" << endl; #endif - int max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits; - if (max_count > 0) + int max_count = 0; + if (m_param.m_max_io_waits > m_stats.m_current_io_waits) { + max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits; max_count = max_count / 2 + 1; + } Page_sublist& pl_map = *m_page_sublist[Page_entry::SL_MAP]; while (! pl_map.isEmpty() && --max_count >= 0) @@ -1056,15 +1109,10 @@ Pgman::process_cleanup(Signal* signal) } int max_loop_count = m_param.m_max_loop_count; - int max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits; - - if (max_count > 0) - { + int max_count = 0; + if (m_param.m_max_io_waits > m_stats.m_current_io_waits) { + max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits; max_count = max_count / 2 + 1; - /* - * Possibly add code here to avoid writing too rapidly. May be - * unnecessary since only cold pages are cleaned. - */ } Ptr<Page_entry> ptr = m_cleanup_ptr; @@ -1166,9 +1214,12 @@ bool Pgman::process_lcp(Signal* signal) { Page_hashlist& pl_hash = m_page_hashlist; - int max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits; - if (max_count > 0) + + int max_count = 0; + if (m_param.m_max_io_waits > m_stats.m_current_io_waits) { + max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits; max_count = max_count / 2 + 1; + } #ifdef VM_TRACE debugOut @@ -1927,6 +1978,8 @@ Pgman::verify_page_entry(Ptr<Page_entry> ptr) break; case Page_entry::SL_LOCKED: break; + case Page_entry::SL_IDLE: + break; case Page_entry::SL_OTHER: break; default: @@ -1973,8 +2026,11 @@ Pgman::verify_page_lists() ndbrequire(stack_count == pl_stack.count() || dump_page_lists()); ndbrequire(queue_count == pl_queue.count() || dump_page_lists()); + Uint32 hot_count = 0; Uint32 hot_bound_count = 0; Uint32 cold_bound_count = 0; + Uint32 stack_request_count = 0; + Uint32 queue_request_count = 0; Uint32 i1 = RNIL; for (pl_stack.first(ptr); ptr.i != RNIL; pl_stack.next(ptr)) @@ -1985,9 +2041,13 @@ Pgman::verify_page_lists() ndbrequire(state & Page_entry::ONSTACK || dump_page_lists()); if (! pl_stack.hasPrev(ptr)) ndbrequire(state & Page_entry::HOT || dump_page_lists()); - if (state & Page_entry::HOT && - state & Page_entry::BOUND) - hot_bound_count++; + if (state & Page_entry::HOT) { + hot_count++; + if (state & Page_entry::BOUND) + hot_bound_count++; + } + if (state & Page_entry::REQUEST) + stack_request_count++; } Uint32 i2 = RNIL; @@ -1999,6 +2059,8 @@ Pgman::verify_page_lists() ndbrequire(state & Page_entry::ONQUEUE || dump_page_lists()); ndbrequire(state & Page_entry::BOUND || dump_page_lists()); cold_bound_count++; + if (state & Page_entry::REQUEST) + queue_request_count++; } Uint32 tot_bound_count = @@ -2031,7 +2093,11 @@ Pgman::verify_page_lists() << " cache:" << m_stats.m_num_pages << "(" << locked_bound_count << "L)" << " stack:" << pl_stack.count() + << " hot:" << hot_count + << " hot_bound:" << hot_bound_count + << " stack_request:" << stack_request_count << " queue:" << pl_queue.count() + << " queue_request:" << queue_request_count << " queuewait:" << queuewait_count << endl; debugOut << "PGMAN:"; @@ -2139,6 +2205,8 @@ Pgman::get_sublist_name(Uint32 list_no) return "busy"; case Page_entry::SL_LOCKED: return "locked"; + case Page_entry::SL_IDLE: + return "idle"; case Page_entry::SL_OTHER: return "other"; } diff --git a/storage/ndb/src/kernel/blocks/pgman.hpp b/storage/ndb/src/kernel/blocks/pgman.hpp index 07029d1c3e5..e3bf0fa5780 100644 --- a/storage/ndb/src/kernel/blocks/pgman.hpp +++ b/storage/ndb/src/kernel/blocks/pgman.hpp @@ -325,8 +325,9 @@ private: ,SL_CALLBACK_IO = 4 ,SL_BUSY = 5 ,SL_LOCKED = 6 - ,SL_OTHER = 7 - ,SUBLIST_COUNT = 8 + ,SL_IDLE = 7 + ,SL_OTHER = 8 + ,SUBLIST_COUNT = 9 }; Uint16 m_file_no; // disk page address set at seize @@ -401,6 +402,7 @@ private: struct Param { Param(); Uint32 m_max_pages; // max number of cache pages + Uint32 m_lirs_stack_mult; // in m_max_pages (around 3-10) Uint32 m_max_hot_pages; // max hot cache pages (up to 99%) Uint32 m_max_loop_count; // limit purely local loops Uint32 m_max_io_waits; diff --git a/storage/ndb/src/kernel/blocks/restore.cpp b/storage/ndb/src/kernel/blocks/restore.cpp index d4a2414ef2f..2d40cd79daa 100644 --- a/storage/ndb/src/kernel/blocks/restore.cpp +++ b/storage/ndb/src/kernel/blocks/restore.cpp @@ -557,6 +557,9 @@ Restore::restore_next(Signal* signal, FilePtr file_ptr) case BackupFormat::GCP_ENTRY: parse_gcp_entry(signal, file_ptr, data, len); break; + case BackupFormat::EMPTY_ENTRY: + // skip + break; case 0x4e444242: // 'NDBB' if (check_file_version(signal, ntohl(* (data+2))) == 0) { diff --git a/storage/ndb/src/kernel/vm/Configuration.cpp b/storage/ndb/src/kernel/vm/Configuration.cpp index ebdd4c97aab..72770d35cde 100644 --- a/storage/ndb/src/kernel/vm/Configuration.cpp +++ b/storage/ndb/src/kernel/vm/Configuration.cpp @@ -443,6 +443,11 @@ Configuration::setupConfiguration(){ "TimeBetweenWatchDogCheck missing"); } + if(iter.get(CFG_DB_WATCHDOG_INTERVAL_INITIAL, &_timeBetweenWatchDogCheckInitial)){ + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", + "TimeBetweenWatchDogCheckInitial missing"); + } + /** * Get paths */ @@ -462,9 +467,12 @@ Configuration::setupConfiguration(){ * Create the watch dog thread */ { - Uint32 t = _timeBetweenWatchDogCheck; + if (_timeBetweenWatchDogCheckInitial < _timeBetweenWatchDogCheck) + _timeBetweenWatchDogCheckInitial = _timeBetweenWatchDogCheck; + + Uint32 t = _timeBetweenWatchDogCheckInitial; t = globalEmulatorData.theWatchDog ->setCheckInterval(t); - _timeBetweenWatchDogCheck = t; + _timeBetweenWatchDogCheckInitial = t; } ConfigValues* cf = ConfigValuesFactory::extractCurrentSection(iter.m_config); diff --git a/storage/ndb/src/kernel/vm/Configuration.hpp b/storage/ndb/src/kernel/vm/Configuration.hpp index 934261e40af..918a889a171 100644 --- a/storage/ndb/src/kernel/vm/Configuration.hpp +++ b/storage/ndb/src/kernel/vm/Configuration.hpp @@ -84,6 +84,7 @@ private: Uint32 _maxErrorLogs; Uint32 _lockPagesInMainMemory; Uint32 _timeBetweenWatchDogCheck; + Uint32 _timeBetweenWatchDogCheckInitial; ndb_mgm_configuration * m_ownConfig; ndb_mgm_configuration * m_clusterConfig; diff --git a/storage/ndb/src/kernel/vm/SimulatedBlock.cpp b/storage/ndb/src/kernel/vm/SimulatedBlock.cpp index 3125fc33258..bc16b9f364e 100644 --- a/storage/ndb/src/kernel/vm/SimulatedBlock.cpp +++ b/storage/ndb/src/kernel/vm/SimulatedBlock.cpp @@ -19,6 +19,7 @@ #include <NdbOut.hpp> #include <GlobalData.hpp> #include <Emulator.hpp> +#include <WatchDog.hpp> #include <ErrorHandlingMacros.hpp> #include <TimeQueue.hpp> #include <TransporterRegistry.hpp> @@ -38,6 +39,9 @@ #include <AttributeDescriptor.hpp> #include <NdbSqlUtil.hpp> +#include <EventLogger.hpp> +extern EventLogger g_eventLogger; + #define ljamEntry() jamEntryLine(30000 + __LINE__) #define ljam() jamLine(30000 + __LINE__) @@ -655,14 +659,20 @@ SimulatedBlock::getBatSize(Uint16 blockNo){ return sb->theBATSize; } +void* SimulatedBlock::allocRecord(const char * type, size_t s, size_t n, bool clear, Uint32 paramId) +{ + return allocRecordAligned(type, s, n, 0, 0, clear, paramId); +} + void* -SimulatedBlock::allocRecord(const char * type, size_t s, size_t n, bool clear, Uint32 paramId) +SimulatedBlock::allocRecordAligned(const char * type, size_t s, size_t n, void **unaligned_buffer, Uint32 align, bool clear, Uint32 paramId) { void * p = NULL; - size_t size = n*s; - Uint64 real_size = (Uint64)((Uint64)n)*((Uint64)s); - refresh_watch_dog(); + Uint32 over_alloc = unaligned_buffer ? (align - 1) : 0; + size_t size = n*s + over_alloc; + Uint64 real_size = (Uint64)((Uint64)n)*((Uint64)s) + over_alloc; + refresh_watch_dog(9); if (real_size > 0){ #ifdef VM_TRACE_MEM ndbout_c("%s::allocRecord(%s, %u, %u) = %llu bytes", @@ -696,14 +706,24 @@ SimulatedBlock::allocRecord(const char * type, size_t s, size_t n, bool clear, U char * ptr = (char*)p; const Uint32 chunk = 128 * 1024; while(size > chunk){ - refresh_watch_dog(); + refresh_watch_dog(9); memset(ptr, 0, chunk); ptr += chunk; size -= chunk; } - refresh_watch_dog(); + refresh_watch_dog(9); memset(ptr, 0, size); } + if (unaligned_buffer) + { + *unaligned_buffer = p; + p = (void *)(((UintPtr)p + over_alloc) & ~(UintPtr)(over_alloc)); +#ifdef VM_TRACE + g_eventLogger.info("'%s' (%u) %llu %llu, alignment correction %u bytes", + type, align, (Uint64)p, (Uint64)p+n*s, + (Uint32)((UintPtr)p - (UintPtr)*unaligned_buffer)); +#endif + } } return p; } @@ -720,9 +740,16 @@ SimulatedBlock::deallocRecord(void ** ptr, } void -SimulatedBlock::refresh_watch_dog() +SimulatedBlock::refresh_watch_dog(Uint32 place) +{ + globalData.incrementWatchDogCounter(place); +} + +void +SimulatedBlock::update_watch_dog_timer(Uint32 interval) { - globalData.incrementWatchDogCounter(1); + extern EmulatorData globalEmulatorData; + globalEmulatorData.theWatchDog->setCheckInterval(interval); } void @@ -1631,6 +1658,11 @@ SimulatedBlock::sendFragmentedSignal(NodeReceiverGroup rg, } SimulatedBlock::Callback SimulatedBlock::TheEmptyCallback = {0, 0}; +void +SimulatedBlock::TheNULLCallbackFunction(class Signal*, Uint32, Uint32) +{ abort(); /* should never be called */ } +SimulatedBlock::Callback SimulatedBlock::TheNULLCallback = +{ &SimulatedBlock::TheNULLCallbackFunction, 0 }; void SimulatedBlock::sendFragmentedSignal(BlockReference ref, diff --git a/storage/ndb/src/kernel/vm/SimulatedBlock.hpp b/storage/ndb/src/kernel/vm/SimulatedBlock.hpp index 37a8dde5956..a78ee21fb8f 100644 --- a/storage/ndb/src/kernel/vm/SimulatedBlock.hpp +++ b/storage/ndb/src/kernel/vm/SimulatedBlock.hpp @@ -131,6 +131,8 @@ public: virtual const char* get_filename(Uint32 fd) const { return "";} protected: static Callback TheEmptyCallback; + void TheNULLCallbackFunction(class Signal*, Uint32, Uint32); + static Callback TheNULLCallback; void execute(Signal* signal, Callback & c, Uint32 returnCode); @@ -334,7 +336,8 @@ protected: * Refresh Watch Dog in initialising code * */ - void refresh_watch_dog(); + void refresh_watch_dog(Uint32 place = 1); + void update_watch_dog_timer(Uint32 interval); /** * Prog error @@ -377,6 +380,7 @@ protected: * */ void* allocRecord(const char * type, size_t s, size_t n, bool clear = true, Uint32 paramId = 0); + void* allocRecordAligned(const char * type, size_t s, size_t n, void **unaligned_buffer, Uint32 align = NDB_O_DIRECT_WRITE_ALIGNMENT, bool clear = true, Uint32 paramId = 0); /** * Deallocate record @@ -597,6 +601,8 @@ inline void SimulatedBlock::execute(Signal* signal, Callback & c, Uint32 returnCode){ CallbackFunction fun = c.m_callbackFunction; + if (fun == TheNULLCallback.m_callbackFunction) + return; ndbrequire(fun != 0); c.m_callbackFunction = NULL; (this->*fun)(signal, c.m_callbackData, returnCode); diff --git a/storage/ndb/src/kernel/vm/WatchDog.cpp b/storage/ndb/src/kernel/vm/WatchDog.cpp index d1abb709b1e..a7f5e8f5c2b 100644 --- a/storage/ndb/src/kernel/vm/WatchDog.cpp +++ b/storage/ndb/src/kernel/vm/WatchDog.cpp @@ -16,6 +16,7 @@ #include <ndb_global.h> #include <my_pthread.h> +#include <sys/times.h> #include "WatchDog.hpp" #include "GlobalData.hpp" @@ -24,6 +25,8 @@ #include <ErrorHandlingMacros.hpp> #include <EventLogger.hpp> +#include <NdbTick.h> + extern EventLogger g_eventLogger; extern "C" @@ -71,66 +74,115 @@ WatchDog::doStop(){ } } +const char *get_action(Uint32 IPValue) +{ + const char *action; + switch (IPValue) { + case 1: + action = "Job Handling"; + break; + case 2: + action = "Scanning Timers"; + break; + case 3: + action = "External I/O"; + break; + case 4: + action = "Print Job Buffers at crash"; + break; + case 5: + action = "Checking connections"; + break; + case 6: + action = "Performing Send"; + break; + case 7: + action = "Polling for Receive"; + break; + case 8: + action = "Performing Receive"; + break; + case 9: + action = "Allocating memory"; + break; + default: + action = "Unknown place"; + break; + }//switch + return action; +} + void -WatchDog::run(){ - unsigned int anIPValue; - unsigned int alerts = 0; +WatchDog::run() +{ + unsigned int anIPValue, sleep_time; unsigned int oldIPValue = 0; - + unsigned int theIntervalCheck = theInterval; + struct MicroSecondTimer start_time, last_time, now; + NdbTick_getMicroTimer(&start_time); + last_time = start_time; + // WatchDog for the single threaded NDB - while(!theStop){ - Uint32 tmp = theInterval / 500; - tmp= (tmp ? tmp : 1); - - while(!theStop && tmp > 0){ - NdbSleep_MilliSleep(500); - tmp--; - } - + while (!theStop) + { + sleep_time= 100; + + NdbSleep_MilliSleep(sleep_time); if(theStop) break; + NdbTick_getMicroTimer(&now); + if (NdbTick_getMicrosPassed(last_time, now)/1000 > sleep_time*2) + { + struct tms my_tms; + times(&my_tms); + g_eventLogger.info("Watchdog: User time: %llu System time: %llu", + (Uint64)my_tms.tms_utime, + (Uint64)my_tms.tms_stime); + g_eventLogger.warning("Watchdog: Warning overslept %u ms, expected %u ms.", + NdbTick_getMicrosPassed(last_time, now)/1000, + sleep_time); + } + last_time = now; + // Verify that the IP thread is not stuck in a loop anIPValue = *theIPValue; - if(anIPValue != 0) { + if (anIPValue != 0) + { oldIPValue = anIPValue; globalData.incrementWatchDogCounter(0); - alerts = 0; - } else { - const char *last_stuck_action; - alerts++; - switch (oldIPValue) { - case 1: - last_stuck_action = "Job Handling"; - break; - case 2: - last_stuck_action = "Scanning Timers"; - break; - case 3: - last_stuck_action = "External I/O"; - break; - case 4: - last_stuck_action = "Print Job Buffers at crash"; - break; - case 5: - last_stuck_action = "Checking connections"; - break; - case 6: - last_stuck_action = "Performing Send"; - break; - case 7: - last_stuck_action = "Polling for Receive"; - break; - case 8: - last_stuck_action = "Performing Receive"; - break; - default: - last_stuck_action = "Unknown place"; - break; - }//switch - g_eventLogger.warning("Ndb kernel is stuck in: %s", last_stuck_action); - if(alerts == 3){ - shutdownSystem(last_stuck_action); + NdbTick_getMicroTimer(&start_time); + theIntervalCheck = theInterval; + } + else + { + int warn = 1; + Uint32 elapsed = NdbTick_getMicrosPassed(start_time, now)/1000; + /* + oldIPValue == 9 indicates malloc going on, this can take some time + so only warn if we pass the watchdog interval + */ + if (oldIPValue == 9) + if (elapsed < theIntervalCheck) + warn = 0; + else + theIntervalCheck += theInterval; + + if (warn) + { + const char *last_stuck_action = get_action(oldIPValue); + g_eventLogger.warning("Ndb kernel is stuck in: %s", last_stuck_action); + { + struct tms my_tms; + times(&my_tms); + g_eventLogger.info("Watchdog: User time: %llu System time: %llu", + (Uint64)my_tms.tms_utime, + (Uint64)my_tms.tms_stime); + } + if (elapsed > 3 * theInterval) + { + shutdownSystem(last_stuck_action); + } } } } diff --git a/storage/ndb/src/mgmapi/mgmapi.cpp b/storage/ndb/src/mgmapi/mgmapi.cpp index e7dc1d1d503..5f975da8c73 100644 --- a/storage/ndb/src/mgmapi/mgmapi.cpp +++ b/storage/ndb/src/mgmapi/mgmapi.cpp @@ -524,7 +524,7 @@ ndb_mgm_connect(NdbMgmHandle handle, int no_retries, NDB_SOCKET_TYPE sockfd= NDB_INVALID_SOCKET; Uint32 i; SocketClient s(0, 0); - s.set_connect_timeout(handle->timeout); + s.set_connect_timeout((handle->timeout+999)/1000); if (!s.init()) { fprintf(handle->errstream, diff --git a/storage/ndb/src/mgmclient/CommandInterpreter.cpp b/storage/ndb/src/mgmclient/CommandInterpreter.cpp index 93fc3d46e43..00ea882a49d 100644 --- a/storage/ndb/src/mgmclient/CommandInterpreter.cpp +++ b/storage/ndb/src/mgmclient/CommandInterpreter.cpp @@ -18,6 +18,7 @@ #include <Vector.hpp> #include <mgmapi.h> #include <util/BaseString.hpp> +#include <ndbd_exit_codes.h> class MgmtSrvr; @@ -704,6 +705,133 @@ CommandInterpreter::printError() } } +/* + * print log event from mgmsrv to console screen + */ +#define make_uint64(a,b) (((Uint64)(a)) + (((Uint64)(b)) << 32)) +#define Q64(a) make_uint64(event->EVENT.a ## _lo, event->EVENT.a ## _hi) +#define R event->source_nodeid +#define Q(a) event->EVENT.a +#define QVERSION getMajor(Q(version)), getMinor(Q(version)), getBuild(Q(version)) +#define NDB_LE_(a) NDB_LE_ ## a +static void +printLogEvent(struct ndb_logevent* event) +{ + switch (event->type) { + /** + * NDB_MGM_EVENT_CATEGORY_BACKUP + */ +#undef EVENT +#define EVENT BackupStarted + case NDB_LE_BackupStarted: + ndbout_c("Node %u: Backup %d started from node %d", + R, Q(backup_id), Q(starting_node)); + break; +#undef EVENT +#define EVENT BackupFailedToStart + case NDB_LE_BackupFailedToStart: + ndbout_c("Node %u: Backup request from %d failed to start. Error: %d", + R, Q(starting_node), Q(error)); + break; +#undef EVENT +#define EVENT BackupCompleted + case NDB_LE_BackupCompleted: + ndbout_c("Node %u: Backup %u started from node %u completed\n" + " StartGCP: %u StopGCP: %u\n" + " #Records: %u #LogRecords: %u\n" + " Data: %u bytes Log: %u bytes", R, + Q(backup_id), Q(starting_node), + Q(start_gci), Q(stop_gci), + Q(n_records), Q(n_log_records), + Q(n_bytes), Q(n_log_bytes)); + break; +#undef EVENT +#define EVENT BackupAborted + case NDB_LE_BackupAborted: + ndbout_c("Node %u: Backup %d started from %d has been aborted. Error: %d", + R, Q(backup_id), Q(starting_node), Q(error)); + break; + /** + * NDB_MGM_EVENT_CATEGORY_STARTUP + */ +#undef EVENT +#define EVENT NDBStartStarted + case NDB_LE_NDBStartStarted: + ndbout_c("Node %u: Start initiated (version %d.%d.%d)", + R, QVERSION); + break; +#undef EVENT +#define EVENT NDBStartCompleted + case NDB_LE_NDBStartCompleted: + ndbout_c("Node %u: Started (version %d.%d.%d)", + R, QVERSION); + break; +#undef EVENT +#define EVENT NDBStopStarted + case NDB_LE_NDBStopStarted: + ndbout_c("Node %u: %s shutdown initiated", R, + (Q(stoptype) == 1 ? "Cluster" : "Node")); + break; +#undef EVENT +#define EVENT NDBStopCompleted + case NDB_LE_NDBStopCompleted: + { + BaseString action_str(""); + BaseString signum_str(""); + getRestartAction(Q(action), action_str); + if (Q(signum)) + signum_str.appfmt(" Initiated by signal %d.", + Q(signum)); + ndbout_c("Node %u: Node shutdown completed%s.%s", + R, action_str.c_str(), signum_str.c_str()); + } + break; +#undef EVENT +#define EVENT NDBStopForced + case NDB_LE_NDBStopForced: + { + BaseString action_str(""); + BaseString reason_str(""); + BaseString sphase_str(""); + int signum = Q(signum); + int error = Q(error); + int sphase = Q(sphase); + int extra = Q(extra); + getRestartAction(Q(action), action_str); + if (signum) + reason_str.appfmt(" Initiated by signal %d.", signum); + if (error) + { + ndbd_exit_classification cl; + ndbd_exit_status st; + const char *msg = ndbd_exit_message(error, &cl); + const char *cl_msg = ndbd_exit_classification_message(cl, &st); + const char *st_msg = ndbd_exit_status_message(st); + reason_str.appfmt(" Caused by error %d: \'%s(%s). %s\'.", + error, msg, cl_msg, st_msg); + if (extra != 0) + reason_str.appfmt(" (extra info %d)", extra); + } + if (sphase < 255) + sphase_str.appfmt(" Occured during startphase %u.", sphase); + ndbout_c("Node %u: Forced node shutdown completed%s.%s%s", + R, action_str.c_str(), sphase_str.c_str(), + reason_str.c_str()); + } + break; +#undef EVENT +#define EVENT StopAborted + case NDB_LE_NDBStopAborted: + ndbout_c("Node %u: Node shutdown aborted", R); + break; + /** + * default nothing to print + */ + default: + break; + } +} + //***************************************************************************** //***************************************************************************** @@ -720,30 +848,21 @@ event_thread_run(void* p) int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP, 1, NDB_MGM_EVENT_CATEGORY_STARTUP, 0 }; - int fd = ndb_mgm_listen_event(handle, filter); - if (fd != NDB_INVALID_SOCKET) + + NdbLogEventHandle log_handle= NULL; + struct ndb_logevent log_event; + + log_handle= ndb_mgm_create_logevent_handle(handle, filter); + if (log_handle) { do_event_thread= 1; - char *tmp= 0; - char buf[1024]; do { - SocketInputStream in(fd,2000); - if((tmp = in.gets(buf, sizeof(buf)))) - { - const char ping_token[]= "<PING>"; - if (memcmp(ping_token,tmp,sizeof(ping_token)-1)) - if(tmp && strlen(tmp)) - { - Guard g(printmutex); - ndbout << tmp; - } - } - else if(in.timedout() && ndb_mgm_check_connection(handle)<0) - { - break; - } + if (ndb_logevent_get_next(log_handle, &log_event, 2000) <= 0) + continue; + Guard g(printmutex); + printLogEvent(&log_event); } while(do_event_thread); - NDB_CLOSE_SOCKET(fd); + ndb_mgm_destroy_logevent_handle(&log_handle); } else { @@ -1008,6 +1127,7 @@ CommandInterpreter::execute_impl(const char *_line, bool interactive) } else if(strcasecmp(firstToken, "ENTER") == 0 && allAfterFirstToken != NULL && + allAfterFirstToken != NULL && strncasecmp(allAfterFirstToken, "SINGLE USER MODE ", sizeof("SINGLE USER MODE") - 1) == 0){ m_error = executeEnterSingleUser(allAfterFirstToken); @@ -2476,8 +2596,7 @@ CommandInterpreter::executeStartBackup(char* parameters, bool interactive) { struct ndb_mgm_reply reply; unsigned int backupId; - int fd = -1; - + Vector<BaseString> args; { BaseString(parameters).split(args); @@ -2494,8 +2613,6 @@ CommandInterpreter::executeStartBackup(char* parameters, bool interactive) if (sz == 2 && args[1] == "NOWAIT") { flags = 0; - result = ndb_mgm_start_backup(m_mgmsrv, 0, &backupId, &reply); - goto END_BACKUP; } else if (sz == 1 || (sz == 3 && args[1] == "WAIT" && args[2] == "COMPLETED")) { @@ -2513,62 +2630,74 @@ CommandInterpreter::executeStartBackup(char* parameters, bool interactive) return -1; } - /** - * If interactive...event listner is already running - */ + NdbLogEventHandle log_handle= NULL; + struct ndb_logevent log_event; if (flags == 2 && !interactive) { int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP, 0, 0 }; - fd = ndb_mgm_listen_event(m_mgmsrv, filter); - if (fd < 0) + log_handle = ndb_mgm_create_logevent_handle(m_mgmsrv, filter); + if (!log_handle) { ndbout << "Initializing start of backup failed" << endl; printError(); - return fd; + return -1; } } result = ndb_mgm_start_backup(m_mgmsrv, flags, &backupId, &reply); -END_BACKUP: if (result != 0) { ndbout << "Backup failed" << endl; printError(); - if (fd >= 0) - close(fd); + if (log_handle) + ndb_mgm_destroy_logevent_handle(&log_handle); return result; } - if (fd >= 0) + /** + * If interactive, event listner thread is already running + */ + if (log_handle && !interactive) { - char *tmp; - char buf[1024]; - { - SocketInputStream in(fd); - int count = 0; - do { - tmp = in.gets(buf, 1024); - if(tmp) - { - ndbout << tmp; - unsigned int id; - if(sscanf(tmp, "%*[^:]: Backup %d ", &id) == 1 && id == backupId){ - count++; - } - } - } while(count < 2); - } - - SocketInputStream in(fd, 10); + int count = 0; + int retry = 0; do { - tmp = in.gets(buf, 1024); - if(tmp && tmp[0] != 0) + if (ndb_logevent_get_next(log_handle, &log_event, 60000) > 0) + { + int print = 0; + switch (log_event.type) { + case NDB_LE_BackupStarted: + if (log_event.BackupStarted.backup_id == backupId) + print = 1; + break; + case NDB_LE_BackupCompleted: + if (log_event.BackupCompleted.backup_id == backupId) + print = 1; + break; + case NDB_LE_BackupAborted: + if (log_event.BackupAborted.backup_id == backupId) + print = 1; + break; + default: + break; + } + if (print) + { + Guard g(m_print_mutex); + printLogEvent(&log_event); + count++; + } + } + else { - ndbout << tmp; + retry++; } - } while(tmp && tmp[0] != 0); - - close(fd); + } while(count < 2 && retry < 3); + + if (retry >= 3) + ndbout << "get backup event failed for " << retry << " times" << endl; + + ndb_mgm_destroy_logevent_handle(&log_handle); } return 0; diff --git a/storage/ndb/src/mgmclient/Makefile.am b/storage/ndb/src/mgmclient/Makefile.am index 5b2009240c3..8e05354919b 100644 --- a/storage/ndb/src/mgmclient/Makefile.am +++ b/storage/ndb/src/mgmclient/Makefile.am @@ -21,7 +21,8 @@ libndbmgmclient_la_LIBADD = ../mgmapi/libmgmapi.la \ ../common/logger/liblogger.la \ ../common/portlib/libportlib.la \ ../common/util/libgeneral.la \ - ../common/portlib/libportlib.la + ../common/portlib/libportlib.la \ + ../common/debugger/libtrace.la ndb_mgm_SOURCES = main.cpp diff --git a/storage/ndb/src/mgmclient/main.cpp b/storage/ndb/src/mgmclient/main.cpp index d0c117f20d3..fbd81c71700 100644 --- a/storage/ndb/src/mgmclient/main.cpp +++ b/storage/ndb/src/mgmclient/main.cpp @@ -23,6 +23,8 @@ extern "C" { #elif !defined(__NETWARE__) #include <readline/readline.h> extern "C" int add_history(const char *command); /* From readline directory */ +extern "C" int read_history(const char *command); +extern "C" int write_history(const char *command); #define HAVE_READLINE #endif } @@ -155,10 +157,35 @@ int main(int argc, char** argv){ signal(SIGPIPE, handler); com = new Ndb_mgmclient(opt_connect_str,1); int ret= 0; + BaseString histfile; if (!opt_execute_str) { +#ifdef HAVE_READLINE + char *histfile_env= getenv("NDB_MGM_HISTFILE"); + if (histfile_env) + histfile.assign(histfile_env,strlen(histfile_env)); + else if(getenv("HOME")) + { + histfile.assign(getenv("HOME"),strlen(getenv("HOME"))); + histfile.append("/.ndb_mgm_history"); + } + if (histfile.length()) + read_history(histfile.c_str()); +#endif + ndbout << "-- NDB Cluster -- Management Client --" << endl; while(read_and_execute(_try_reconnect)); + +#ifdef HAVE_READLINE + if (histfile.length()) + { + BaseString histfile_tmp; + histfile_tmp.assign(histfile); + histfile_tmp.append(".TMP"); + if(!write_history(histfile_tmp.c_str())) + my_rename(histfile_tmp.c_str(), histfile.c_str(), MYF(MY_WME)); + } +#endif } else { diff --git a/storage/ndb/src/mgmsrv/ConfigInfo.cpp b/storage/ndb/src/mgmsrv/ConfigInfo.cpp index e27e55d2a13..69907224b7f 100644 --- a/storage/ndb/src/mgmsrv/ConfigInfo.cpp +++ b/storage/ndb/src/mgmsrv/ConfigInfo.cpp @@ -580,6 +580,18 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = { STR_VALUE(MAX_INT_RNIL) }, { + CFG_DB_WATCHDOG_INTERVAL_INITIAL, + "TimeBetweenWatchDogCheckInitial", + DB_TOKEN, + "Time between execution checks inside a database node in the early start phases when memory is allocated", + ConfigInfo::CI_USED, + true, + ConfigInfo::CI_INT, + "6000", + "70", + STR_VALUE(MAX_INT_RNIL) }, + + { CFG_DB_STOP_ON_ERROR, "StopOnError", DB_TOKEN, @@ -880,6 +892,18 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = { STR_VALUE(MAX_INT_RNIL) }, { + CFG_DB_REDOLOG_FILE_SIZE, + "FragmentLogFileSize", + DB_TOKEN, + "Size of each Redo log file", + ConfigInfo::CI_USED, + false, + ConfigInfo::CI_INT, + "16M", + "4M", + "1G" }, + + { CFG_DB_MAX_OPEN_FILES, "MaxNoOfOpenFiles", DB_TOKEN, @@ -1298,6 +1322,18 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = { STR_VALUE(MAX_INT_RNIL) }, { + CFG_DB_MAX_ALLOCATE, + "MaxAllocate", + DB_TOKEN, + "Maximum size of allocation to use when allocating memory for tables", + ConfigInfo::CI_USED, + false, + ConfigInfo::CI_INT, + "32M", + "1M", + "1G" }, + + { CFG_DB_MEMREPORT_FREQUENCY, "MemReportFrequency", DB_TOKEN, @@ -1309,6 +1345,18 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = { "0", STR_VALUE(MAX_INT_RNIL) }, + { + CFG_DB_O_DIRECT, + "ODirect", + DB_TOKEN, + "Use O_DIRECT file write/read when possible", + ConfigInfo::CI_USED, + true, + ConfigInfo::CI_BOOL, + "false", + "false", + "true"}, + /*************************************************************************** * API ***************************************************************************/ diff --git a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp index f84c79b704f..af708664a69 100644 --- a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -18,6 +18,7 @@ #include "MgmtSrvr.hpp" #include "MgmtErrorReporter.hpp" +#include "ndb_mgmd_error.h" #include <ConfigRetriever.hpp> #include <NdbOut.hpp> @@ -239,13 +240,6 @@ MgmtSrvr::stopEventLog() // Nothing yet } -class ErrorItem -{ -public: - int _errorCode; - const char * _errorText; -}; - bool MgmtSrvr::setEventLogFilter(int severity, int enable) { @@ -268,62 +262,6 @@ MgmtSrvr::isEventLogFilterEnabled(int severity) return g_eventLogger.isEnable((Logger::LoggerLevel)severity); } -static ErrorItem errorTable[] = -{ - {MgmtSrvr::NO_CONTACT_WITH_PROCESS, "No contact with the process (dead ?)."}, - {MgmtSrvr::PROCESS_NOT_CONFIGURED, "The process is not configured."}, - {MgmtSrvr::WRONG_PROCESS_TYPE, - "The process has wrong type. Expected a DB process."}, - {MgmtSrvr::COULD_NOT_ALLOCATE_MEMORY, "Could not allocate memory."}, - {MgmtSrvr::SEND_OR_RECEIVE_FAILED, "Send to process or receive failed."}, - {MgmtSrvr::INVALID_LEVEL, "Invalid level. Should be between 1 and 30."}, - {MgmtSrvr::INVALID_ERROR_NUMBER, "Invalid error number. Should be >= 0."}, - {MgmtSrvr::INVALID_TRACE_NUMBER, "Invalid trace number."}, - {MgmtSrvr::NOT_IMPLEMENTED, "Not implemented."}, - {MgmtSrvr::INVALID_BLOCK_NAME, "Invalid block name"}, - - {MgmtSrvr::CONFIG_PARAM_NOT_EXIST, - "The configuration parameter does not exist for the process type."}, - {MgmtSrvr::CONFIG_PARAM_NOT_UPDATEABLE, - "The configuration parameter is not possible to update."}, - {MgmtSrvr::VALUE_WRONG_FORMAT_INT_EXPECTED, - "Incorrect value. Expected integer."}, - {MgmtSrvr::VALUE_TOO_LOW, "Value is too low."}, - {MgmtSrvr::VALUE_TOO_HIGH, "Value is too high."}, - {MgmtSrvr::VALUE_WRONG_FORMAT_BOOL_EXPECTED, - "Incorrect value. Expected TRUE or FALSE."}, - - {MgmtSrvr::CONFIG_FILE_OPEN_WRITE_ERROR, - "Could not open configuration file for writing."}, - {MgmtSrvr::CONFIG_FILE_OPEN_READ_ERROR, - "Could not open configuration file for reading."}, - {MgmtSrvr::CONFIG_FILE_WRITE_ERROR, - "Write error when writing configuration file."}, - {MgmtSrvr::CONFIG_FILE_READ_ERROR, - "Read error when reading configuration file."}, - {MgmtSrvr::CONFIG_FILE_CLOSE_ERROR, "Could not close configuration file."}, - - {MgmtSrvr::CONFIG_CHANGE_REFUSED_BY_RECEIVER, - "The change was refused by the receiving process."}, - {MgmtSrvr::COULD_NOT_SYNC_CONFIG_CHANGE_AGAINST_PHYSICAL_MEDIUM, - "The change could not be synced against physical medium."}, - {MgmtSrvr::CONFIG_FILE_CHECKSUM_ERROR, - "The config file is corrupt. Checksum error."}, - {MgmtSrvr::NOT_POSSIBLE_TO_SEND_CONFIG_UPDATE_TO_PROCESS_TYPE, - "It is not possible to send an update of a configuration variable " - "to this kind of process."}, - {MgmtSrvr::NODE_SHUTDOWN_IN_PROGESS, "Node shutdown in progress" }, - {MgmtSrvr::SYSTEM_SHUTDOWN_IN_PROGRESS, "System shutdown in progress" }, - {MgmtSrvr::NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH, - "Node shutdown would cause system crash" }, - {MgmtSrvr::UNSUPPORTED_NODE_SHUTDOWN, - "Unsupported multi node shutdown. Abort option required." }, - {MgmtSrvr::NODE_NOT_API_NODE, "The specified node is not an API node." }, - {MgmtSrvr::OPERATION_NOT_ALLOWED_START_STOP, - "Operation not allowed while nodes are starting or stopping."}, - {MgmtSrvr::NO_CONTACT_WITH_DB_NODES, "No contact with database nodes" } -}; - int MgmtSrvr::translateStopRef(Uint32 errCode) { switch(errCode){ @@ -343,8 +281,6 @@ int MgmtSrvr::translateStopRef(Uint32 errCode) return 4999; } -static int noOfErrorCodes = sizeof(errorTable) / sizeof(ErrorItem); - int MgmtSrvr::getNodeCount(enum ndb_mgm_node_type type) const { @@ -1969,18 +1905,8 @@ MgmtSrvr::dumpState(int nodeId, const Uint32 args[], Uint32 no) const char* MgmtSrvr::getErrorText(int errorCode, char *buf, int buf_sz) { - - for (int i = 0; i < noOfErrorCodes; ++i) { - if (errorCode == errorTable[i]._errorCode) { - BaseString::snprintf(buf, buf_sz, errorTable[i]._errorText); - buf[buf_sz-1]= 0; - return buf; - } - } - ndb_error_string(errorCode, buf, buf_sz); buf[buf_sz-1]= 0; - return buf; } diff --git a/storage/ndb/src/mgmsrv/MgmtSrvr.hpp b/storage/ndb/src/mgmsrv/MgmtSrvr.hpp index a54b7866091..90287554ef8 100644 --- a/storage/ndb/src/mgmsrv/MgmtSrvr.hpp +++ b/storage/ndb/src/mgmsrv/MgmtSrvr.hpp @@ -148,45 +148,6 @@ public: */ bool isEventLogFilterEnabled(int severity); - STATIC_CONST( NO_CONTACT_WITH_PROCESS = 5000 ); - STATIC_CONST( PROCESS_NOT_CONFIGURED = 5001 ); - STATIC_CONST( WRONG_PROCESS_TYPE = 5002 ); - STATIC_CONST( COULD_NOT_ALLOCATE_MEMORY = 5003 ); - STATIC_CONST( SEND_OR_RECEIVE_FAILED = 5005 ); - STATIC_CONST( INVALID_LEVEL = 5006 ); - STATIC_CONST( INVALID_ERROR_NUMBER = 5007 ); - STATIC_CONST( INVALID_TRACE_NUMBER = 5008 ); - STATIC_CONST( NOT_IMPLEMENTED = 5009 ); - STATIC_CONST( INVALID_BLOCK_NAME = 5010 ); - - STATIC_CONST( CONFIG_PARAM_NOT_EXIST = 5011 ); - STATIC_CONST( CONFIG_PARAM_NOT_UPDATEABLE = 5012 ); - STATIC_CONST( VALUE_WRONG_FORMAT_INT_EXPECTED = 5013 ); - STATIC_CONST( VALUE_TOO_LOW = 5014 ); - STATIC_CONST( VALUE_TOO_HIGH = 5015 ); - STATIC_CONST( VALUE_WRONG_FORMAT_BOOL_EXPECTED = 5016 ); - - STATIC_CONST( CONFIG_FILE_OPEN_WRITE_ERROR = 5017 ); - STATIC_CONST( CONFIG_FILE_OPEN_READ_ERROR = 5018 ); - STATIC_CONST( CONFIG_FILE_WRITE_ERROR = 5019 ); - STATIC_CONST( CONFIG_FILE_READ_ERROR = 5020 ); - STATIC_CONST( CONFIG_FILE_CLOSE_ERROR = 5021 ); - - STATIC_CONST( CONFIG_CHANGE_REFUSED_BY_RECEIVER = 5022 ); - STATIC_CONST( COULD_NOT_SYNC_CONFIG_CHANGE_AGAINST_PHYSICAL_MEDIUM = 5023 ); - STATIC_CONST( CONFIG_FILE_CHECKSUM_ERROR = 5024 ); - STATIC_CONST( NOT_POSSIBLE_TO_SEND_CONFIG_UPDATE_TO_PROCESS_TYPE = 5025 ); - - STATIC_CONST( NODE_SHUTDOWN_IN_PROGESS = 5026 ); - STATIC_CONST( SYSTEM_SHUTDOWN_IN_PROGRESS = 5027 ); - STATIC_CONST( NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH = 5028 ); - - STATIC_CONST( NO_CONTACT_WITH_DB_NODES = 5030 ); - STATIC_CONST( UNSUPPORTED_NODE_SHUTDOWN = 5031 ); - - STATIC_CONST( NODE_NOT_API_NODE = 5062 ); - STATIC_CONST( OPERATION_NOT_ALLOWED_START_STOP = 5063 ); - /** * This enum specifies the different signal loggig modes possible to set * with the setSignalLoggingMode method. diff --git a/storage/ndb/src/mgmsrv/ndb_mgmd_error.h b/storage/ndb/src/mgmsrv/ndb_mgmd_error.h new file mode 100644 index 00000000000..2438f15c808 --- /dev/null +++ b/storage/ndb/src/mgmsrv/ndb_mgmd_error.h @@ -0,0 +1,33 @@ +/* Copyright (C) 2007 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef NDB_MGMD_ERROR_H +#define NDB_MGMD_ERROR_H + +#define NO_CONTACT_WITH_PROCESS 5000 +#define WRONG_PROCESS_TYPE 5002 +#define SEND_OR_RECEIVE_FAILED 5005 +#define INVALID_ERROR_NUMBER 5007 +#define INVALID_TRACE_NUMBER 5008 +#define INVALID_BLOCK_NAME 5010 +#define NODE_SHUTDOWN_IN_PROGESS 5026 +#define SYSTEM_SHUTDOWN_IN_PROGRESS 5027 +#define NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH 5028 +#define NO_CONTACT_WITH_DB_NODES 5030 +#define UNSUPPORTED_NODE_SHUTDOWN 5031 +#define NODE_NOT_API_NODE 5062 +#define OPERATION_NOT_ALLOWED_START_STOP 5063 + +#endif diff --git a/storage/ndb/src/ndbapi/Ndb.cpp b/storage/ndb/src/ndbapi/Ndb.cpp index 78b7af5522b..9b8e4e86d30 100644 --- a/storage/ndb/src/ndbapi/Ndb.cpp +++ b/storage/ndb/src/ndbapi/Ndb.cpp @@ -754,17 +754,27 @@ Ndb::getNodeId() } /**************************************************************************** -Uint64 getTupleIdFromNdb( Uint32 aTableId, Uint32 cacheSize ); - -Parameters: aTableId : The TableId. - cacheSize: Prefetch this many values -Remark: Returns a new TupleId to the application. - The TupleId comes from SYSTAB_0 where SYSKEY_0 = TableId. - It is initialized to (TableId << 48) + 1 in NdbcntrMain.cpp. +Uint64 getAutoIncrementValue( const char* aTableName, + Uint64 & autoValue, + Uint32 cacheSize, + Uint64 step, + Uint64 start); + +Parameters: aTableName (IN) : The table name. + autoValue (OUT) : Returns new autoincrement value + cacheSize (IN) : Prefetch this many values + step (IN) : Specifies the step between the + autoincrement values. + start (IN) : Start value for first value +Remark: Returns a new autoincrement value to the application. + The autoincrement values can be increased by steps + (default 1) and a number of values can be prefetched + by specifying cacheSize (default 10). ****************************************************************************/ int Ndb::getAutoIncrementValue(const char* aTableName, - Uint64 & tupleId, Uint32 cacheSize) + Uint64 & autoValue, Uint32 cacheSize, + Uint64 step, Uint64 start) { DBUG_ENTER("Ndb::getAutoIncrementValue"); ASSERT_NOT_MYSQLD; @@ -778,15 +788,16 @@ Ndb::getAutoIncrementValue(const char* aTableName, } const NdbTableImpl* table = info->m_table_impl; TupleIdRange & range = info->m_tuple_id_range; - if (getTupleIdFromNdb(table, range, tupleId, cacheSize) == -1) + if (getTupleIdFromNdb(table, range, autoValue, cacheSize, step, start) == -1) DBUG_RETURN(-1); - DBUG_PRINT("info", ("value %lu", (ulong) tupleId)); + DBUG_PRINT("info", ("value %lu", (ulong) autoValue)); DBUG_RETURN(0); } int Ndb::getAutoIncrementValue(const NdbDictionary::Table * aTable, - Uint64 & tupleId, Uint32 cacheSize) + Uint64 & autoValue, Uint32 cacheSize, + Uint64 step, Uint64 start) { DBUG_ENTER("Ndb::getAutoIncrementValue"); ASSERT_NOT_MYSQLD; @@ -801,51 +812,86 @@ Ndb::getAutoIncrementValue(const NdbDictionary::Table * aTable, DBUG_RETURN(-1); } TupleIdRange & range = info->m_tuple_id_range; - if (getTupleIdFromNdb(table, range, tupleId, cacheSize) == -1) + if (getTupleIdFromNdb(table, range, autoValue, cacheSize, step, start) == -1) DBUG_RETURN(-1); - DBUG_PRINT("info", ("value %lu", (ulong)tupleId)); + DBUG_PRINT("info", ("value %lu", (ulong)autoValue)); DBUG_RETURN(0); } int Ndb::getAutoIncrementValue(const NdbDictionary::Table * aTable, - TupleIdRange & range, Uint64 & tupleId, - Uint32 cacheSize) + TupleIdRange & range, Uint64 & autoValue, + Uint32 cacheSize, Uint64 step, Uint64 start) { DBUG_ENTER("Ndb::getAutoIncrementValue"); assert(aTable != 0); const NdbTableImpl* table = & NdbTableImpl::getImpl(*aTable); - if (getTupleIdFromNdb(table, range, tupleId, cacheSize) == -1) + if (getTupleIdFromNdb(table, range, autoValue, cacheSize, step, start) == -1) DBUG_RETURN(-1); - DBUG_PRINT("info", ("value %lu", (ulong)tupleId)); + DBUG_PRINT("info", ("value %lu", (ulong)autoValue)); DBUG_RETURN(0); } int Ndb::getTupleIdFromNdb(const NdbTableImpl* table, - TupleIdRange & range, Uint64 & tupleId, Uint32 cacheSize) + TupleIdRange & range, Uint64 & tupleId, + Uint32 cacheSize, Uint64 step, Uint64 start) { +/* + Returns a new TupleId to the application. + The TupleId comes from SYSTAB_0 where SYSKEY_0 = TableId. + It is initialized to (TableId << 48) + 1 in NdbcntrMain.cpp. + In most cases step= start= 1, in which case we get: + 1,2,3,4,5,... + If step=10 and start=5 and first number is 1, we get: + 5,15,25,35,... +*/ DBUG_ENTER("Ndb::getTupleIdFromNdb"); - if (range.m_first_tuple_id != range.m_last_tuple_id) + /* + Check if the next value can be taken from the pre-fetched + sequence. + */ + if (range.m_first_tuple_id != range.m_last_tuple_id && + range.m_first_tuple_id + step <= range.m_last_tuple_id) { assert(range.m_first_tuple_id < range.m_last_tuple_id); - tupleId = ++range.m_first_tuple_id; - DBUG_PRINT("info", ("next cached value %lu", (ulong)tupleId)); + range.m_first_tuple_id += step; + tupleId = range.m_first_tuple_id; + DBUG_PRINT("info", ("Next cached value %lu", (ulong) tupleId)); } else { + /* + If start value is greater than step it is ignored + */ + Uint64 offset = (start > step) ? 1 : start; + + /* + Pre-fetch a number of values depending on cacheSize + */ if (cacheSize == 0) cacheSize = 1; + DBUG_PRINT("info", ("reading %u values from database", (uint)cacheSize)); /* * reserve next cacheSize entries in db. adds cacheSize to NEXTID - * and returns first tupleId in the new range. + * and returns first tupleId in the new range. If tupleId's are + * incremented in steps then multiply the cacheSize with step size. */ - Uint64 opValue = cacheSize; + Uint64 opValue = cacheSize * step; + if (opTupleIdOnNdb(table, range, opValue, 0) == -1) DBUG_RETURN(-1); - tupleId = opValue; + DBUG_PRINT("info", ("Next value fetched from database %lu", (ulong) opValue)); + DBUG_PRINT("info", ("Increasing %lu by offset %lu, increment is %lu", (ulong) (ulong) opValue, (ulong) offset, (ulong) step)); + Uint64 current, next; + Uint64 div = ((Uint64) (opValue + step - offset)) / step; + next = div * step + offset; + current = (next < step) ? next : next - step; + tupleId = (opValue <= current) ? current : next; + DBUG_PRINT("info", ("Returning %lu", (ulong) tupleId)); + range.m_first_tuple_id = tupleId; } DBUG_RETURN(0); } diff --git a/storage/ndb/src/ndbapi/NdbBlob.cpp b/storage/ndb/src/ndbapi/NdbBlob.cpp index e651a4c70b0..a0244bde95b 100644 --- a/storage/ndb/src/ndbapi/NdbBlob.cpp +++ b/storage/ndb/src/ndbapi/NdbBlob.cpp @@ -1262,6 +1262,7 @@ NdbBlob::deletePartsUnknown(Uint32 part) DBUG_RETURN(-1); } tOp->m_abortOption= NdbOperation::AO_IgnoreError; + tOp->m_noErrorPropagation = true; n++; } DBUG_PRINT("info", ("bat=%u", bat)); @@ -1598,6 +1599,7 @@ NdbBlob::preExecute(NdbTransaction::ExecType anExecType, bool& batch) } if (isWriteOp()) { tOp->m_abortOption = NdbOperation::AO_IgnoreError; + tOp->m_noErrorPropagation = true; } theHeadInlineReadOp = tOp; // execute immediately @@ -1644,6 +1646,7 @@ NdbBlob::preExecute(NdbTransaction::ExecType anExecType, bool& batch) } if (isWriteOp()) { tOp->m_abortOption = NdbOperation::AO_IgnoreError; + tOp->m_noErrorPropagation = true; } theHeadInlineReadOp = tOp; // execute immediately diff --git a/storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp b/storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp index 00acfe62ad9..a82983fca8c 100644 --- a/storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp +++ b/storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp @@ -41,6 +41,7 @@ #include <NdbEventOperation.hpp> #include "NdbEventOperationImpl.hpp" #include <signaldata/AlterTable.hpp> +#include "ndb_internal.hpp" #include <EventLogger.hpp> extern EventLogger g_eventLogger; @@ -2838,7 +2839,7 @@ send_report: data[5]= apply_gci >> 32; data[6]= latest_gci & ~(Uint32)0; data[7]= latest_gci >> 32; - m_ndb->theImpl->send_event_report(data,8); + Ndb_internal::send_event_report(m_ndb, data,8); #ifdef VM_TRACE assert(m_total_alloc >= m_free_data_sz); #endif diff --git a/storage/ndb/src/ndbapi/NdbOperation.cpp b/storage/ndb/src/ndbapi/NdbOperation.cpp index 903372ddb9d..50531292e40 100644 --- a/storage/ndb/src/ndbapi/NdbOperation.cpp +++ b/storage/ndb/src/ndbapi/NdbOperation.cpp @@ -76,7 +76,8 @@ NdbOperation::NdbOperation(Ndb* aNdb, NdbOperation::Type aType) : m_keyInfoGSN(GSN_KEYINFO), m_attrInfoGSN(GSN_ATTRINFO), theBlobList(NULL), - m_abortOption(-1) + m_abortOption(-1), + m_noErrorPropagation(false) { theReceiver.init(NdbReceiver::NDB_OPERATION, this); theError.code = 0; @@ -101,7 +102,8 @@ NdbOperation::setErrorCode(int anErrorCode) theError.code = anErrorCode; theNdbCon->theErrorLine = theErrorLine; theNdbCon->theErrorOperation = this; - theNdbCon->setOperationErrorCode(anErrorCode); + if (!(m_abortOption == AO_IgnoreError && m_noErrorPropagation)) + theNdbCon->setOperationErrorCode(anErrorCode); } /****************************************************************************** @@ -116,6 +118,7 @@ NdbOperation::setErrorCodeAbort(int anErrorCode) theError.code = anErrorCode; theNdbCon->theErrorLine = theErrorLine; theNdbCon->theErrorOperation = this; + // ignore m_noErrorPropagation theNdbCon->setOperationErrorCodeAbort(anErrorCode); } @@ -161,6 +164,7 @@ NdbOperation::init(const NdbTableImpl* tab, NdbTransaction* myConnection){ theMagicNumber = 0xABCDEF01; theBlobList = NULL; m_abortOption = -1; + m_noErrorPropagation = false; m_no_disk_flag = 1; tSignal = theNdb->getSignal(); diff --git a/storage/ndb/src/ndbapi/ndb_internal.hpp b/storage/ndb/src/ndbapi/ndb_internal.hpp new file mode 100644 index 00000000000..f5f37f95a04 --- /dev/null +++ b/storage/ndb/src/ndbapi/ndb_internal.hpp @@ -0,0 +1,26 @@ +/* Copyright (C) 2007 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "NdbImpl.hpp" + +class Ndb_internal +{ +private: + friend class NdbEventBuffer; + Ndb_internal() {} + virtual ~Ndb_internal() {} + static int send_event_report(Ndb *ndb, Uint32 *data, Uint32 length) + { return ndb->theImpl->send_event_report(data, length); } +}; diff --git a/storage/ndb/src/ndbapi/ndberror.c b/storage/ndb/src/ndbapi/ndberror.c index 8c40ddf2116..914acd17c08 100644 --- a/storage/ndb/src/ndbapi/ndberror.c +++ b/storage/ndb/src/ndbapi/ndberror.c @@ -19,6 +19,9 @@ #include <ndberror.h> #include <m_string.h> +#include "../mgmsrv/ndb_mgmd_error.h" + + typedef struct ErrorBundle { int code; int mysql_code; @@ -179,7 +182,7 @@ ErrorBundle ErrorCodes[] = { { 873, DMEC, TR, "Out of attrinfo records for scan in tuple manager" }, { 899, DMEC, TR, "Rowid already allocated" }, { 1217, DMEC, TR, "Out of operation records in local data manager (increase MaxNoOfLocalOperations)" }, - { 1220, DMEC, TR, "REDO log files overloaded, consult online manual (decrease TimeBetweenLocalCheckpoints, and|or increase NoOfFragmentLogFiles)" }, + { 1220, DMEC, TR, "REDO log files overloaded, consult online manual (increase FragmentLogFileSize)" }, { 1222, DMEC, TR, "Out of transaction markers in LQH" }, { 4021, DMEC, TR, "Out of Send Buffer space in NDB API" }, { 4022, DMEC, TR, "Out of Send Buffer space in NDB API" }, @@ -619,6 +622,33 @@ ErrorBundle ErrorCodes[] = { { 4273, DMEC, IE, "No blob table in dict cache" }, { 4274, DMEC, IE, "Corrupted main table PK in blob operation" }, { 4275, DMEC, AE, "The blob method is incompatible with operation type or lock mode" }, + + { NO_CONTACT_WITH_PROCESS, DMEC, AE, + "No contact with the process (dead ?)."}, + { WRONG_PROCESS_TYPE, DMEC, AE, + "The process has wrong type. Expected a DB process."}, + { SEND_OR_RECEIVE_FAILED, DMEC, AE, + "Send to process or receive failed."}, + { INVALID_ERROR_NUMBER, DMEC, AE, + "Invalid error number. Should be >= 0."}, + { INVALID_TRACE_NUMBER, DMEC, AE, + "Invalid trace number."}, + { INVALID_BLOCK_NAME, DMEC, AE, + "Invalid block name"}, + { NODE_SHUTDOWN_IN_PROGESS, DMEC, AE, + "Node shutdown in progress" }, + { SYSTEM_SHUTDOWN_IN_PROGRESS, DMEC, AE, + "System shutdown in progress" }, + { NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH, DMEC, AE, + "Node shutdown would cause system crash" }, + { UNSUPPORTED_NODE_SHUTDOWN, DMEC, AE, + "Unsupported multi node shutdown. Abort option required." }, + { NODE_NOT_API_NODE, DMEC, AE, + "The specified node is not an API node." }, + { OPERATION_NOT_ALLOWED_START_STOP, DMEC, AE, + "Operation not allowed while nodes are starting or stopping."}, + { NO_CONTACT_WITH_DB_NODES, DMEC, AE, + "No contact with database nodes" } }; static |