diff options
Diffstat (limited to 'ndb/src/kernel')
104 files changed, 8370 insertions, 9821 deletions
diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index d6dfcfe8587..c8c9e82efc2 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -4,13 +4,13 @@ Next NDBFS 2000 Next DBACC 3002 Next DBTUP 4014 Next DBLQH 5043 -Next DBDICT 6006 -Next DBDIH 7174 +Next DBDICT 6007 +Next DBDIH 7177 Next DBTC 8038 Next CMVMI 9000 Next BACKUP 10022 Next DBUTIL 11002 -Next DBTUX 12007 +Next DBTUX 12008 Next SUMA 13001 TESTING NODE FAILURE, ARBITRATION @@ -205,6 +205,8 @@ Delay execution of ABORTREQ signal 2 seconds to generate time-out. 8048: Make TC not choose own node for simple/dirty read 5041: Crash is receiving simple read from other TC on different node +8050: Send TCKEYREF is operation is non local + 5100,5101: Drop ABORT req in primary replica Crash on "next" ABORT @@ -311,6 +313,10 @@ Test Crashes in handling node restarts 7170: Crash when receiving START_PERMREF (InitialStartRequired) +7174: Crash starting node before sending DICT_LOCK_REQ +7175: Master sends one fake START_PERMREF (ZNODE_ALREADY_STARTING_ERROR) +7176: Slave NR pretends master does not support DICT lock (rolling upgrade) + DICT: 6000 Crash during NR when receiving DICTSTARTREQ 6001 Crash during NR when receiving SCHEMA_INFO @@ -430,6 +436,7 @@ Drop Table/Index: 8034: Fail next index create in TC 8035: Fail next trigger drop in TC 8036: Fail next index drop in TC +6006: Crash participant in create index 4013: verify TUP tab descr before and after next DROP TABLE @@ -463,6 +470,7 @@ Test routing of signals: Ordered index: -------------- +12007: Make next alloc node fail with no memory error Dbdict: ------- diff --git a/ndb/src/kernel/blocks/backup/Backup.cpp b/ndb/src/kernel/blocks/backup/Backup.cpp index b23b434fcd9..43c1de5e2b3 100644 --- a/ndb/src/kernel/blocks/backup/Backup.cpp +++ b/ndb/src/kernel/blocks/backup/Backup.cpp @@ -72,6 +72,106 @@ static Uint32 g_TypeOfStart = NodeState::ST_ILLEGAL_TYPE; #define SEND_BACKUP_STARTED_FLAG(A) (((A) & 0x3) > 0) #define SEND_BACKUP_COMPLETED_FLAG(A) (((A) & 0x3) > 1) +void +Backup::execREAD_CONFIG_REQ(Signal* signal) +{ + jamEntry(); + + const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr(); + + Uint32 ref = req->senderRef; + Uint32 senderData = req->senderData; + + const ndb_mgm_configuration_iterator * p = + theConfiguration.getOwnConfigIterator(); + ndbrequire(p != 0); + + c_nodePool.setSize(MAX_NDB_NODES); + + Uint32 noBackups = 0, noTables = 0, noAttribs = 0, noFrags = 0; + ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS, &m_diskless)); + ndb_mgm_get_int_parameter(p, CFG_DB_PARALLEL_BACKUPS, &noBackups); + // ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_NO_TABLES, &noTables)); + ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DICT_TABLE, &noTables)); + ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_NO_ATTRIBUTES, &noAttribs)); + ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_FRAG_CONNECT, &noFrags)); + + noAttribs++; //RT 527 bug fix + + c_backupPool.setSize(noBackups); + c_backupFilePool.setSize(3 * noBackups); + c_tablePool.setSize(noBackups * noTables); + c_attributePool.setSize(noBackups * noAttribs); + c_triggerPool.setSize(noBackups * 3 * noTables); + c_fragmentPool.setSize(noBackups * noFrags); + + Uint32 szMem = 0; + ndb_mgm_get_int_parameter(p, CFG_DB_BACKUP_MEM, &szMem); + Uint32 noPages = (szMem + sizeof(Page32) - 1) / sizeof(Page32); + // We need to allocate an additional of 2 pages. 1 page because of a bug in + // ArrayPool and another one for DICTTAINFO. + c_pagePool.setSize(noPages + NO_OF_PAGES_META_FILE + 2); + + Uint32 szDataBuf = (2 * 1024 * 1024); + Uint32 szLogBuf = (2 * 1024 * 1024); + Uint32 szWrite = 32768, maxWriteSize = (256 * 1024); + ndb_mgm_get_int_parameter(p, CFG_DB_BACKUP_DATA_BUFFER_MEM, &szDataBuf); + ndb_mgm_get_int_parameter(p, CFG_DB_BACKUP_LOG_BUFFER_MEM, &szLogBuf); + ndb_mgm_get_int_parameter(p, CFG_DB_BACKUP_WRITE_SIZE, &szWrite); + ndb_mgm_get_int_parameter(p, CFG_DB_BACKUP_MAX_WRITE_SIZE, &maxWriteSize); + + c_defaults.m_logBufferSize = szLogBuf; + c_defaults.m_dataBufferSize = szDataBuf; + c_defaults.m_minWriteSize = szWrite; + c_defaults.m_maxWriteSize = maxWriteSize; + + { // Init all tables + ArrayList<Table> tables(c_tablePool); + TablePtr ptr; + while(tables.seize(ptr)){ + new (ptr.p) Table(c_attributePool, c_fragmentPool); + } + tables.release(); + } + + { + ArrayList<BackupFile> ops(c_backupFilePool); + BackupFilePtr ptr; + while(ops.seize(ptr)){ + new (ptr.p) BackupFile(* this, c_pagePool); + } + ops.release(); + } + + { + ArrayList<BackupRecord> recs(c_backupPool); + BackupRecordPtr ptr; + while(recs.seize(ptr)){ + new (ptr.p) BackupRecord(* this, c_pagePool, c_tablePool, + c_backupFilePool, c_triggerPool); + } + recs.release(); + } + + // Initialize BAT for interface to file system + { + Page32Ptr p; + ndbrequire(c_pagePool.seizeId(p, 0)); + c_startOfPages = (Uint32 *)p.p; + c_pagePool.release(p); + + NewVARIABLE* bat = allocateBat(1); + bat[0].WA = c_startOfPages; + bat[0].nrr = c_pagePool.getSize()*sizeof(Page32)/sizeof(Uint32); + } + + ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend(); + conf->senderRef = reference(); + conf->senderData = senderData; + sendSignal(ref, GSN_READ_CONFIG_CONF, signal, + ReadConfigConf::SignalLength, JBB); +} + void Backup::execSTTOR(Signal* signal) { @@ -166,6 +266,65 @@ Backup::execCONTINUEB(Signal* signal) const Uint32 Tdata2 = signal->theData[2]; switch(Tdata0) { + case BackupContinueB::BACKUP_FRAGMENT_INFO: + { + const Uint32 ptr_I = Tdata1; + Uint32 tabPtr_I = Tdata2; + Uint32 fragPtr_I = signal->theData[3]; + + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, ptr_I); + TablePtr tabPtr; + ptr.p->tables.getPtr(tabPtr, tabPtr_I); + FragmentPtr fragPtr; + tabPtr.p->fragments.getPtr(fragPtr, fragPtr_I); + + BackupFilePtr filePtr; + ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr); + + const Uint32 sz = sizeof(BackupFormat::CtlFile::FragmentInfo) >> 2; + Uint32 * dst; + if (!filePtr.p->operation.dataBuffer.getWritePtr(&dst, sz)) + { + sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 100, 4); + return; + } + + BackupFormat::CtlFile::FragmentInfo * fragInfo = + (BackupFormat::CtlFile::FragmentInfo*)dst; + fragInfo->SectionType = htonl(BackupFormat::FRAGMENT_INFO); + fragInfo->SectionLength = htonl(sz); + fragInfo->TableId = htonl(fragPtr.p->tableId); + fragInfo->FragmentNo = htonl(fragPtr_I); + fragInfo->NoOfRecordsLow = htonl(fragPtr.p->noOfRecords & 0xFFFFFFFF); + fragInfo->NoOfRecordsHigh = htonl(fragPtr.p->noOfRecords >> 32); + fragInfo->FilePosLow = htonl(0 & 0xFFFFFFFF); + fragInfo->FilePosHigh = htonl(0 >> 32); + + filePtr.p->operation.dataBuffer.updateWritePtr(sz); + + fragPtr_I++; + if (fragPtr_I == tabPtr.p->fragments.getSize()) + { + signal->theData[0] = tabPtr.p->tableId; + signal->theData[1] = 0; // unlock + EXECUTE_DIRECT(DBDICT, GSN_BACKUP_FRAGMENT_REQ, signal, 2); + + fragPtr_I = 0; + ptr.p->tables.next(tabPtr); + if ((tabPtr_I = tabPtr.i) == RNIL) + { + closeFiles(signal, ptr); + return; + } + } + signal->theData[0] = BackupContinueB::BACKUP_FRAGMENT_INFO; + signal->theData[1] = ptr_I; + signal->theData[2] = tabPtr_I; + signal->theData[3] = fragPtr_I; + sendSignal(BACKUP_REF, GSN_CONTINUEB, signal, 4, JBB); + return; + } case BackupContinueB::START_FILE_THREAD: case BackupContinueB::BUFFER_UNDERFLOW: { @@ -355,7 +514,7 @@ Backup::findTable(const BackupRecordPtr & ptr, return false; } -static Uint32 xps(Uint32 x, Uint64 ms) +static Uint32 xps(Uint64 x, Uint64 ms) { float fx = x; float fs = ms; @@ -369,9 +528,9 @@ static Uint32 xps(Uint32 x, Uint64 ms) } struct Number { - Number(Uint32 r) { val = r;} - Number & operator=(Uint32 r) { val = r; return * this; } - Uint32 val; + Number(Uint64 r) { val = r;} + Number & operator=(Uint64 r) { val = r; return * this; } + Uint64 val; }; NdbOut & @@ -445,8 +604,10 @@ Backup::execBACKUP_COMPLETE_REP(Signal* signal) startTime = NdbTick_CurrentMillisecond() - startTime; ndbout_c("Backup %d has completed", rep->backupId); - const Uint32 bytes = rep->noOfBytes; - const Uint32 records = rep->noOfRecords; + const Uint64 bytes = + rep->noOfBytesLow + (((Uint64)rep->noOfBytesHigh) << 32); + const Uint64 records = + rep->noOfRecordsLow + (((Uint64)rep->noOfRecordsHigh) << 32); Number rps = xps(records, startTime); Number bps = xps(bytes, startTime); @@ -959,7 +1120,7 @@ Backup::sendBackupRef(BlockReference senderRef, Uint32 flags, Signal *signal, } if(errorCode != BackupRef::IAmNotMaster){ - signal->theData[0] = EventReport::BackupFailedToStart; + signal->theData[0] = NDB_LE_BackupFailedToStart; signal->theData[1] = senderRef; signal->theData[2] = errorCode; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); @@ -1218,7 +1379,7 @@ Backup::defineBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId) BackupConf::SignalLength, JBB); } - signal->theData[0] = EventReport::BackupStarted; + signal->theData[0] = NDB_LE_BackupStarted; signal->theData[1] = ptr.p->clientRef; signal->theData[2] = ptr.p->backupId; ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+3); @@ -1805,8 +1966,10 @@ Backup::execBACKUP_FRAGMENT_CONF(Signal* signal) const Uint32 tableId = conf->tableId; const Uint32 fragmentNo = conf->fragmentNo; const Uint32 nodeId = refToNode(signal->senderBlockRef()); - const Uint32 noOfBytes = conf->noOfBytes; - const Uint32 noOfRecords = conf->noOfRecords; + const Uint64 noOfBytes = + conf->noOfBytesLow + (((Uint64)conf->noOfBytesHigh) << 32); + const Uint64 noOfRecords = + conf->noOfRecordsLow + (((Uint64)conf->noOfRecordsHigh) << 32); BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); @@ -1818,9 +1981,13 @@ Backup::execBACKUP_FRAGMENT_CONF(Signal* signal) TablePtr tabPtr; ndbrequire(findTable(ptr, tabPtr, tableId)); + tabPtr.p->noOfRecords += noOfRecords; + FragmentPtr fragPtr; tabPtr.p->fragments.getPtr(fragPtr, fragmentNo); + fragPtr.p->noOfRecords = noOfRecords; + ndbrequire(fragPtr.p->scanned == 0); ndbrequire(fragPtr.p->scanning == 1); ndbrequire(fragPtr.p->node == nodeId); @@ -1844,6 +2011,24 @@ Backup::execBACKUP_FRAGMENT_CONF(Signal* signal) } else { + NodeBitmask nodes = ptr.p->nodes; + nodes.clear(getOwnNodeId()); + if (!nodes.isclear()) + { + BackupFragmentCompleteRep *rep = + (BackupFragmentCompleteRep*)signal->getDataPtrSend(); + rep->backupId = ptr.p->backupId; + rep->backupPtr = ptr.i; + rep->tableId = tableId; + rep->fragmentNo = fragmentNo; + rep->noOfTableRowsLow = (Uint32)(tabPtr.p->noOfRecords & 0xFFFFFFFF); + rep->noOfTableRowsHigh = (Uint32)(tabPtr.p->noOfRecords >> 32); + rep->noOfFragmentRowsLow = (Uint32)(noOfRecords & 0xFFFFFFFF); + rep->noOfFragmentRowsHigh = (Uint32)(noOfRecords >> 32); + NodeReceiverGroup rg(BACKUP, ptr.p->nodes); + sendSignal(rg, GSN_BACKUP_FRAGMENT_COMPLETE_REP, signal, + BackupFragmentCompleteRep::SignalLength, JBB); + } nextFragment(signal, ptr); } } @@ -1906,6 +2091,29 @@ err: execABORT_BACKUP_ORD(signal); } +void +Backup::execBACKUP_FRAGMENT_COMPLETE_REP(Signal* signal) +{ + jamEntry(); + BackupFragmentCompleteRep * rep = + (BackupFragmentCompleteRep*)signal->getDataPtr(); + + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, rep->backupPtr); + + TablePtr tabPtr; + ndbrequire(findTable(ptr, tabPtr, rep->tableId)); + + tabPtr.p->noOfRecords = + rep->noOfTableRowsLow + (((Uint64)rep->noOfTableRowsHigh) << 32); + + FragmentPtr fragPtr; + tabPtr.p->fragments.getPtr(fragPtr, rep->fragmentNo); + + fragPtr.p->noOfRecords = + rep->noOfFragmentRowsLow + (((Uint64)rep->noOfFragmentRowsHigh) << 32); +} + /***************************************************************************** * * Master functionallity - Drop triggers @@ -2106,8 +2314,10 @@ Backup::stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId) rep->senderData = ptr.p->clientData; rep->startGCP = ptr.p->startGCP; rep->stopGCP = ptr.p->stopGCP; - rep->noOfBytes = ptr.p->noOfBytes; - rep->noOfRecords = ptr.p->noOfRecords; + rep->noOfBytesLow = (Uint32)(ptr.p->noOfBytes & 0xFFFFFFFF); + rep->noOfRecordsLow = (Uint32)(ptr.p->noOfRecords & 0xFFFFFFFF); + rep->noOfBytesHigh = (Uint32)(ptr.p->noOfBytes >> 32); + rep->noOfRecordsHigh = (Uint32)(ptr.p->noOfRecords >> 32); rep->noOfLogBytes = ptr.p->noOfLogBytes; rep->noOfLogRecords = ptr.p->noOfLogRecords; rep->nodes = ptr.p->nodes; @@ -2115,17 +2325,19 @@ Backup::stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId) BackupCompleteRep::SignalLength, JBB); } - signal->theData[0] = EventReport::BackupCompleted; + signal->theData[0] = NDB_LE_BackupCompleted; signal->theData[1] = ptr.p->clientRef; signal->theData[2] = ptr.p->backupId; signal->theData[3] = ptr.p->startGCP; signal->theData[4] = ptr.p->stopGCP; - signal->theData[5] = ptr.p->noOfBytes; - signal->theData[6] = ptr.p->noOfRecords; + signal->theData[5] = (Uint32)(ptr.p->noOfBytes & 0xFFFFFFFF); + signal->theData[6] = (Uint32)(ptr.p->noOfRecords & 0xFFFFFFFF); signal->theData[7] = ptr.p->noOfLogBytes; signal->theData[8] = ptr.p->noOfLogRecords; ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+9); - sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 9+NdbNodeBitmask::Size, JBB); + signal->theData[9+NdbNodeBitmask::Size] = (Uint32)(ptr.p->noOfBytes >> 32); + signal->theData[10+NdbNodeBitmask::Size] = (Uint32)(ptr.p->noOfRecords >> 32); + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 11+NdbNodeBitmask::Size, JBB); } else { @@ -2160,7 +2372,7 @@ Backup::masterAbort(Signal* signal, BackupRecordPtr ptr) sendSignal(ptr.p->clientRef, GSN_BACKUP_ABORT_REP, signal, BackupAbortRep::SignalLength, JBB); } - signal->theData[0] = EventReport::BackupAborted; + signal->theData[0] = NDB_LE_BackupAborted; signal->theData[1] = ptr.p->clientRef; signal->theData[2] = ptr.p->backupId; signal->theData[3] = ptr.p->errorCode; @@ -2324,7 +2536,6 @@ Backup::execDEFINE_BACKUP_REQ(Signal* signal) }; const Uint32 maxInsert[] = { 2048, // Temporarily to solve TR515 - //25, // 100 bytes 4096, // 4k 16*3000, // Max 16 tuples }; @@ -2889,8 +3100,7 @@ Backup::parseTableDescription(Signal* signal, BackupRecordPtr ptr, Uint32 len) /** * Initialize table object */ - tabPtr.p->frag_mask = RNIL; - + tabPtr.p->noOfRecords = 0; tabPtr.p->schemaVersion = tmpTab.TableVersion; tabPtr.p->noOfAttributes = tmpTab.NoOfAttributes; tabPtr.p->noOfNull = 0; @@ -2983,7 +3193,6 @@ Backup::execDI_FCOUNTCONF(Signal* signal) ndbrequire(findTable(ptr, tabPtr, tableId)); ndbrequire(tabPtr.p->fragments.seize(fragCount) != false); - tabPtr.p->frag_mask = calculate_frag_mask(fragCount); for(Uint32 i = 0; i<fragCount; i++) { jam(); FragmentPtr fragPtr; @@ -3599,8 +3808,10 @@ Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr) conf->backupPtr = ptr.i; conf->tableId = filePtr.p->tableId; conf->fragmentNo = filePtr.p->fragmentNo; - conf->noOfRecords = op.noOfRecords; - conf->noOfBytes = op.noOfBytes; + conf->noOfRecordsLow = (Uint32)(op.noOfRecords & 0xFFFFFFFF); + conf->noOfRecordsHigh = (Uint32)(op.noOfRecords >> 32); + conf->noOfBytesLow = (Uint32)(op.noOfBytes & 0xFFFFFFFF); + conf->noOfBytesHigh = (Uint32)(op.noOfBytes >> 32); sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_CONF, signal, BackupFragmentConf::SignalLength, JBB); @@ -3801,15 +4012,6 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr) * Slave functionallity: Perform logging * ****************************************************************************/ -Uint32 -Backup::calculate_frag_mask(Uint32 count) -{ - Uint32 mask = 1; - while (mask < count) mask <<= 1; - mask -= 1; - return mask; -} - void Backup::execBACKUP_TRIG_REQ(Signal* signal) { @@ -3826,14 +4028,6 @@ Backup::execBACKUP_TRIG_REQ(Signal* signal) jamEntry(); c_triggerPool.getPtr(trigPtr, trigger_id); c_tablePool.getPtr(tabPtr, trigPtr.p->tab_ptr_i); - frag_id = frag_id & tabPtr.p->frag_mask; - /* - At the moment the fragment identity known by TUP is the - actual fragment id but with possibly an extra bit set. - This is due to that ACC splits the fragment. Thus fragment id 5 can - here be either 5 or 13. Thus masking with 2 ** n - 1 where number of - fragments <= 2 ** n will always provide a correct fragment id. - */ tabPtr.p->fragments.getPtr(fragPtr, frag_id); if (fragPtr.p->node != getOwnNodeId()) { jam(); @@ -4044,20 +4238,18 @@ Backup::execSTOP_BACKUP_REQ(Signal* signal) gcp->StartGCP = htonl(startGCP); gcp->StopGCP = htonl(stopGCP - 1); filePtr.p->operation.dataBuffer.updateWritePtr(gcpSz); - } - { - TablePtr tabPtr; - for(ptr.p->tables.first(tabPtr); tabPtr.i != RNIL; - ptr.p->tables.next(tabPtr)) { - signal->theData[0] = tabPtr.p->tableId; - signal->theData[1] = 0; // unlock - EXECUTE_DIRECT(DBDICT, GSN_BACKUP_FRAGMENT_REQ, signal, 2); + TablePtr tabPtr; + ptr.p->tables.first(tabPtr); + + signal->theData[0] = BackupContinueB::BACKUP_FRAGMENT_INFO; + signal->theData[1] = ptr.i; + signal->theData[2] = tabPtr.i; + signal->theData[3] = 0; + sendSignal(BACKUP_REF, GSN_CONTINUEB, signal, 4, JBB); } } - - closeFiles(signal, ptr); } void diff --git a/ndb/src/kernel/blocks/backup/Backup.hpp b/ndb/src/kernel/blocks/backup/Backup.hpp index f3d180b9467..e37923da749 100644 --- a/ndb/src/kernel/blocks/backup/Backup.hpp +++ b/ndb/src/kernel/blocks/backup/Backup.hpp @@ -46,6 +46,7 @@ public: protected: void execSTTOR(Signal* signal); + void execREAD_CONFIG_REQ(Signal* signal); void execDUMP_STATE_ORD(Signal* signal); void execREAD_NODESCONF(Signal* signal); void execNODE_FAILREP(Signal* signal); @@ -67,6 +68,7 @@ protected: void execBACKUP_DATA(Signal* signal); void execSTART_BACKUP_REQ(Signal* signal); void execBACKUP_FRAGMENT_REQ(Signal* signal); + void execBACKUP_FRAGMENT_COMPLETE_REP(Signal* signal); void execSTOP_BACKUP_REQ(Signal* signal); void execBACKUP_STATUS_REQ(Signal* signal); void execABORT_BACKUP_ORD(Signal* signal); @@ -182,10 +184,12 @@ public: typedef Ptr<Attribute> AttributePtr; struct Fragment { + Uint64 noOfRecords; Uint32 tableId; - Uint32 node; - Uint16 scanned; // 0 = not scanned x = scanned by node x - Uint16 scanning; // 0 = not scanning x = scanning on node x + Uint8 node; + Uint8 scanned; // 0 = not scanned x = scanned by node x + Uint8 scanning; // 0 = not scanning x = scanning on node x + Uint8 unused1; Uint32 nextPool; }; typedef Ptr<Fragment> FragmentPtr; @@ -193,9 +197,10 @@ public: struct Table { Table(ArrayPool<Attribute> &, ArrayPool<Fragment> &); + Uint64 noOfRecords; + Uint32 tableId; Uint32 schemaVersion; - Uint32 frag_mask; Uint32 tableType; Uint32 noOfNull; Uint32 noOfAttributes; @@ -269,8 +274,8 @@ public: Uint32 tablePtr; // Ptr.i to current table FsBuffer dataBuffer; - Uint32 noOfRecords; - Uint32 noOfBytes; + Uint64 noOfRecords; + Uint64 noOfBytes; Uint32 maxRecordSize; private: @@ -527,8 +532,6 @@ public: ArrayPool<Node> c_nodePool; ArrayPool<TriggerRecord> c_triggerPool; - Uint32 calculate_frag_mask(Uint32); - void checkFile(Signal*, BackupFilePtr); void checkScan(Signal*, BackupFilePtr); void fragmentCompleted(Signal*, BackupFilePtr); diff --git a/ndb/src/kernel/blocks/backup/BackupFormat.hpp b/ndb/src/kernel/blocks/backup/BackupFormat.hpp index 65dd2ad9053..b8ffff3a294 100644 --- a/ndb/src/kernel/blocks/backup/BackupFormat.hpp +++ b/ndb/src/kernel/blocks/backup/BackupFormat.hpp @@ -32,7 +32,8 @@ struct BackupFormat { FRAGMENT_FOOTER = 3, TABLE_LIST = 4, TABLE_DESCRIPTION = 5, - GCP_ENTRY = 6 + GCP_ENTRY = 6, + FRAGMENT_INFO = 7 }; struct FileHeader { @@ -126,6 +127,20 @@ struct BackupFormat { Uint32 StartGCP; Uint32 StopGCP; }; + + /** + * Fragment Info + */ + struct FragmentInfo { + Uint32 SectionType; + Uint32 SectionLength; + Uint32 TableId; + Uint32 FragmentNo; + Uint32 NoOfRecordsLow; + Uint32 NoOfRecordsHigh; + Uint32 FilePosLow; + Uint32 FilePosHigh; + }; }; /** diff --git a/ndb/src/kernel/blocks/backup/BackupInit.cpp b/ndb/src/kernel/blocks/backup/BackupInit.cpp index dfda31e9b48..96c11468939 100644 --- a/ndb/src/kernel/blocks/backup/BackupInit.cpp +++ b/ndb/src/kernel/blocks/backup/BackupInit.cpp @@ -34,90 +34,10 @@ Backup::Backup(const Configuration & conf) : { BLOCK_CONSTRUCTOR(Backup); - c_nodePool.setSize(MAX_NDB_NODES); c_masterNodeId = getOwnNodeId(); - const ndb_mgm_configuration_iterator * p = conf.getOwnConfigIterator(); - ndbrequire(p != 0); - - Uint32 noBackups = 0, noTables = 0, noAttribs = 0, noFrags = 0; - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS, &m_diskless)); - ndb_mgm_get_int_parameter(p, CFG_DB_PARALLEL_BACKUPS, &noBackups); - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DICT_TABLE, &noTables)); - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DICT_ATTRIBUTE, &noAttribs)); - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_FRAG_CONNECT, &noFrags)); - - noAttribs++; //RT 527 bug fix - - c_backupPool.setSize(noBackups); - c_backupFilePool.setSize(3 * noBackups); - c_tablePool.setSize(noBackups * noTables); - c_attributePool.setSize(noBackups * noAttribs); - c_triggerPool.setSize(noBackups * 3 * noTables); - - c_fragmentPool.setSize(noBackups * noFrags); - - Uint32 szMem = 0; - ndb_mgm_get_int_parameter(p, CFG_DB_BACKUP_MEM, &szMem); - Uint32 noPages = (szMem + sizeof(Page32) - 1) / sizeof(Page32); - // We need to allocate an additional of 2 pages. 1 page because of a bug in - // ArrayPool and another one for DICTTAINFO. - c_pagePool.setSize(noPages + NO_OF_PAGES_META_FILE + 2); - - Uint32 szDataBuf = (2 * 1024 * 1024); - Uint32 szLogBuf = (2 * 1024 * 1024); - Uint32 szWrite = 32768, maxWriteSize = (256 * 1024); - ndb_mgm_get_int_parameter(p, CFG_DB_BACKUP_DATA_BUFFER_MEM, &szDataBuf); - ndb_mgm_get_int_parameter(p, CFG_DB_BACKUP_LOG_BUFFER_MEM, &szLogBuf); - ndb_mgm_get_int_parameter(p, CFG_DB_BACKUP_WRITE_SIZE, &szWrite); - ndb_mgm_get_int_parameter(p, CFG_DB_BACKUP_MAX_WRITE_SIZE, &maxWriteSize); - - c_defaults.m_logBufferSize = szLogBuf; - c_defaults.m_dataBufferSize = szDataBuf; - c_defaults.m_minWriteSize = szWrite; - c_defaults.m_maxWriteSize = maxWriteSize; - - { // Init all tables - ArrayList<Table> tables(c_tablePool); - TablePtr ptr; - while(tables.seize(ptr)){ - new (ptr.p) Table(c_attributePool, c_fragmentPool); - } - tables.release(); - } - - { - ArrayList<BackupFile> ops(c_backupFilePool); - BackupFilePtr ptr; - while(ops.seize(ptr)){ - new (ptr.p) BackupFile(* this, c_pagePool); - } - ops.release(); - } - - { - ArrayList<BackupRecord> recs(c_backupPool); - BackupRecordPtr ptr; - while(recs.seize(ptr)){ - new (ptr.p) BackupRecord(* this, c_pagePool, c_tablePool, - c_backupFilePool, c_triggerPool); - } - recs.release(); - } - - // Initialize BAT for interface to file system - { - Page32Ptr p; - ndbrequire(c_pagePool.seizeId(p, 0)); - c_startOfPages = (Uint32 *)p.p; - c_pagePool.release(p); - - NewVARIABLE* bat = allocateBat(1); - bat[0].WA = c_startOfPages; - bat[0].nrr = c_pagePool.getSize()*sizeof(Page32)/sizeof(Uint32); - } - // Add received signals + addRecSignal(GSN_READ_CONFIG_REQ, &Backup::execREAD_CONFIG_REQ); addRecSignal(GSN_STTOR, &Backup::execSTTOR); addRecSignal(GSN_DUMP_STATE_ORD, &Backup::execDUMP_STATE_ORD); addRecSignal(GSN_READ_NODESCONF, &Backup::execREAD_NODESCONF); @@ -177,6 +97,9 @@ Backup::Backup(const Configuration & conf) : addRecSignal(GSN_BACKUP_FRAGMENT_REQ, &Backup::execBACKUP_FRAGMENT_REQ); addRecSignal(GSN_BACKUP_FRAGMENT_REF, &Backup::execBACKUP_FRAGMENT_REF); addRecSignal(GSN_BACKUP_FRAGMENT_CONF, &Backup::execBACKUP_FRAGMENT_CONF); + + addRecSignal(GSN_BACKUP_FRAGMENT_COMPLETE_REP, + &Backup::execBACKUP_FRAGMENT_COMPLETE_REP); addRecSignal(GSN_STOP_BACKUP_REQ, &Backup::execSTOP_BACKUP_REQ); addRecSignal(GSN_STOP_BACKUP_REF, &Backup::execSTOP_BACKUP_REF); diff --git a/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp b/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp index f9290a75afb..d2f9150ade0 100644 --- a/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp +++ b/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp @@ -72,6 +72,7 @@ Cmvmi::Cmvmi(const Configuration & conf) : addRecSignal(GSN_SET_LOGLEVELORD, &Cmvmi::execSET_LOGLEVELORD); addRecSignal(GSN_EVENT_REP, &Cmvmi::execEVENT_REP); addRecSignal(GSN_STTOR, &Cmvmi::execSTTOR); + addRecSignal(GSN_READ_CONFIG_REQ, &Cmvmi::execREAD_CONFIG_REQ); addRecSignal(GSN_CLOSE_COMREQ, &Cmvmi::execCLOSE_COMREQ); addRecSignal(GSN_ENABLE_COMORD, &Cmvmi::execENABLE_COMORD); addRecSignal(GSN_OPEN_COMREQ, &Cmvmi::execOPEN_COMREQ); @@ -191,7 +192,13 @@ void Cmvmi::execEVENT_REP(Signal* signal) // to the graphical management interface. //----------------------------------------------------------------------- EventReport * const eventReport = (EventReport *)&signal->theData[0]; - EventReport::EventType eventType = eventReport->getEventType(); + Ndb_logevent_type eventType = eventReport->getEventType(); + Uint32 nodeId= eventReport->getNodeId(); + if (nodeId == 0) + { + nodeId= refToNode(signal->getSendersBlockRef()); + eventReport->setNodeId(nodeId); + } jamEntry(); @@ -201,7 +208,8 @@ void Cmvmi::execEVENT_REP(Signal* signal) Uint32 threshold; LogLevel::EventCategory eventCategory; Logger::LoggerLevel severity; - if (EventLoggerBase::event_lookup(eventType,eventCategory,threshold,severity)) + EventLoggerBase::EventTextFunction textF; + if (EventLoggerBase::event_lookup(eventType,eventCategory,threshold,severity,textF)) return; SubscriberPtr ptr; @@ -304,6 +312,27 @@ void Cmvmi::sendSTTORRY(Signal* signal) }//Cmvmi::sendSTTORRY +void +Cmvmi::execREAD_CONFIG_REQ(Signal* signal) +{ + jamEntry(); + + const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr(); + + Uint32 ref = req->senderRef; + Uint32 senderData = req->senderData; + + const ndb_mgm_configuration_iterator * p = + theConfiguration.getOwnConfigIterator(); + ndbrequire(p != 0); + + ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend(); + conf->senderRef = reference(); + conf->senderData = senderData; + sendSignal(ref, GSN_READ_CONFIG_CONF, signal, + ReadConfigConf::SignalLength, JBB); +} + void Cmvmi::execSTTOR(Signal* signal) { Uint32 theStartPhase = signal->theData[1]; @@ -366,7 +395,7 @@ void Cmvmi::execCLOSE_COMREQ(Signal* signal) //----------------------------------------------------- // Report that the connection to the node is closed //----------------------------------------------------- - signal->theData[0] = EventReport::CommunicationClosed; + signal->theData[0] = NDB_LE_CommunicationClosed; signal->theData[1] = i; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); @@ -405,7 +434,7 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal) //----------------------------------------------------- // Report that the connection to the node is opened //----------------------------------------------------- - signal->theData[0] = EventReport::CommunicationOpened; + signal->theData[0] = NDB_LE_CommunicationOpened; signal->theData[1] = tStartingNode; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); //----------------------------------------------------- @@ -424,7 +453,7 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal) globalTransporterRegistry.do_connect(i); globalTransporterRegistry.setIOState(i, HaltIO); - signal->theData[0] = EventReport::CommunicationOpened; + signal->theData[0] = NDB_LE_CommunicationOpened; signal->theData[1] = i; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); } @@ -449,7 +478,7 @@ void Cmvmi::execENABLE_COMORD(Signal* signal) //----------------------------------------------------- // Report that the version of the node //----------------------------------------------------- - signal->theData[0] = EventReport::ConnectedApiVersion; + signal->theData[0] = NDB_LE_ConnectedApiVersion; signal->theData[1] = tStartingNode; signal->theData[2] = getNodeInfo(tStartingNode).m_version; @@ -491,7 +520,7 @@ void Cmvmi::execDISCONNECT_REP(Signal *signal) cancelSubscription(hostId); - signal->theData[0] = EventReport::Disconnected; + signal->theData[0] = NDB_LE_Disconnected; signal->theData[1] = hostId; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); } @@ -541,7 +570,7 @@ void Cmvmi::execCONNECT_REP(Signal *signal){ //------------------------------------------ // Also report this event to the Event handler //------------------------------------------ - signal->theData[0] = EventReport::Connected; + signal->theData[0] = NDB_LE_Connected; signal->theData[1] = hostId; signal->header.theLength = 2; diff --git a/ndb/src/kernel/blocks/cmvmi/Cmvmi.hpp b/ndb/src/kernel/blocks/cmvmi/Cmvmi.hpp index 1c91f564749..f89c8f15e86 100644 --- a/ndb/src/kernel/blocks/cmvmi/Cmvmi.hpp +++ b/ndb/src/kernel/blocks/cmvmi/Cmvmi.hpp @@ -48,6 +48,7 @@ private: void execNDB_TAMPER(Signal* signal); void execSET_LOGLEVELORD(Signal* signal); void execEVENT_REP(Signal* signal); + void execREAD_CONFIG_REQ(Signal* signal); void execSTTOR(Signal* signal); void execCLOSE_COMREQ(Signal* signal); void execENABLE_COMORD(Signal* signal); diff --git a/ndb/src/kernel/blocks/dbacc/Dbacc.hpp b/ndb/src/kernel/blocks/dbacc/Dbacc.hpp index ea866aafff9..7f51a281f37 100644 --- a/ndb/src/kernel/blocks/dbacc/Dbacc.hpp +++ b/ndb/src/kernel/blocks/dbacc/Dbacc.hpp @@ -22,6 +22,9 @@ #include <pc.hpp> #include <SimulatedBlock.hpp> +// primary key is stored in TUP +#include <Dbtup.hpp> + #ifdef DBACC_C // Debug Macros #define dbgWord32(ptr, ind, val) @@ -98,7 +101,6 @@ ndbout << "Ptr: " << ptr.p->word32 << " \tIndex: " << tmp_string << " \tValue: " #define ZPOS_PREV_PAGE 11 #define ZNORMAL_PAGE_TYPE 0 #define ZOVERFLOW_PAGE_TYPE 1 -#define ZLONG_PAGE_TYPE 2 #define ZDEFAULT_LIST 3 #define ZWORDS_IN_PAGE 2048 /* --------------------------------------------------------------------------------- */ @@ -132,16 +134,6 @@ ndbout << "Ptr: " << ptr.p->word32 << " \tIndex: " << tmp_string << " \tValue: " #define ZPAGEZERO_NODETYPE 33 #define ZPAGEZERO_SLACK_CHECK 34 /* --------------------------------------------------------------------------------- */ -/* CONSTANTS FOR THE LONG KEY PAGES */ -/* --------------------------------------------------------------------------------- */ -/* --------------------------------------------------------------------------------- */ -// Maximum number of elements in long key page = (ZWORDS_IN_PAGE - ZHEAD_SIZE) / -// (MinKeySize + IndexSize) = (2048 - 32) / (8 + 1) = 224. MinKeySize is actually 9 -// because 8 is the largest normal key size. -#define ZMAX_NO_OF_LONGKEYS_IN_PAGE 225 -#define ZMAX_LONG_KEY_ARRAY_INDEX 3 -#define ZACTIVE_LONG_KEY_LEN 1 -/* --------------------------------------------------------------------------------- */ /* CONSTANTS IN ALPHABETICAL ORDER */ /* --------------------------------------------------------------------------------- */ #define ZADDFRAG 0 @@ -392,49 +384,6 @@ enum State { // Records - -//---------------------------------------------------------------------------------- -// LONGKEY PAGE RECORD -// -// A long key page consist of a header part, a key data part and an index part. The -// page starts with a header of size HEAD_SIZE. As you can see below, not every word -// in the header is used. After the header comes the data part, where the actual -// keys are stored. A key is always inserted after the existing keys in the data -// part. If we have a fragmented data part and a new key doesn't fit after the -// existing keys we reorganize the keys. The index part starts at the end of the -// page and grows towards the end of the data part. This means that the limit -// between the data part and the index part is floating. Each inserted key have a -// word in the index part that describes size and position of the key in the data -// part. The free indexes in the index part are single linked. -//---------------------------------------------------------------------------------- - union LongKeyPage { - struct { - Uint32 pageId; // ZPOS_PAGE_ID 0 - Uint32 b; - // The number of keys in page. - Uint32 noOfElements; // ZPOS_NO_ELEM_IN_PAGE 2 - Uint32 d; - Uint32 e; - // The free area in the data part of page. - Uint32 freeArea; // ZPOS_FREE_AREA_IN_PAGE 5 - // The index position, which defines the limit between the data and the index part. - Uint32 highestIndex; // ZPOS_LAST_INDEX 6 - // The position where to insert the actual key in the data part. - Uint32 insertPos; // ZPOS_INSERT_INDEX 7 - // Position in a page array where the pages are stored in a double linked list. - // Based on the free area in the page. Values 0 to 3. - Uint32 pageArrayPos; // ZPOS_ARRAY_POS 8 - // Next free position in the index part. - Uint32 nextFreeIndex; // ZPOS_NEXT_FREE_INDEX 9 - // Next page in the double linked list. - Uint32 nextPage; // ZPOS_NEXT_PAGE 10 - // Previous page in the double linked list. - Uint32 prevPage; // ZPOS_PREV_PAGE 11 - } header; - // This is kept to keep the logic and to make changes to a minimum. - Uint32 word32[2048]; - }; - /* --------------------------------------------------------------------------------- */ /* UNDO HEADER RECORD */ /* --------------------------------------------------------------------------------- */ @@ -444,9 +393,7 @@ enum State { ZPAGE_INFO = 0, ZOVER_PAGE_INFO = 1, ZOP_INFO = 2, - ZUNDO_INSERT_LONG_KEY = 3, - ZUNDO_DELETE_LONG_KEY = 4, - ZNO_UNDORECORD_TYPES = 5 + ZNO_UNDORECORD_TYPES = 3 }; UintR tableId; UintR rootFragId; @@ -660,10 +607,10 @@ struct Fragmentrec { //----------------------------------------------------------------------------- // elementLength: Length of element in bucket and overflow pages -// keyLength: Length of key (== 0 if long key or variable key length) +// keyLength: Length of key //----------------------------------------------------------------------------- Uint8 elementLength; - Uint8 keyLength; + Uint16 keyLength; //----------------------------------------------------------------------------- // This flag is used to avoid sending a big number of expand or shrink signals @@ -689,6 +636,11 @@ struct Fragmentrec { //----------------------------------------------------------------------------- Uint8 nodetype; Uint8 stopQueOp; + +//----------------------------------------------------------------------------- +// flag to avoid accessing table record if no char attributes +//----------------------------------------------------------------------------- + Uint8 hasCharAttr; }; typedef Ptr<Fragmentrec> FragmentrecPtr; @@ -771,6 +723,7 @@ struct Operationrec { State transactionstate; Uint16 elementContainer; Uint16 tupkeylen; + Uint32 xfrmtupkeylen; Uint32 userblockref; Uint32 scanBits; Uint8 elementIsDisappeared; @@ -783,7 +736,7 @@ struct Operationrec { Uint8 dirtyRead; Uint8 commitDeleteCheckFlag; Uint8 isAccLockReq; - Uint32 nextOpList; + Uint8 isUndoLogReq; }; /* p2c: size = 168 bytes */ typedef Ptr<Operationrec> OperationrecPtr; @@ -914,6 +867,9 @@ public: Dbacc(const class Configuration &); virtual ~Dbacc(); + // pointer to TUP instance in this thread + Dbtup* c_tup; + private: BLOCK_DEFINES(Dbacc); @@ -972,10 +928,8 @@ private: void initFragGeneral(FragmentrecPtr); void verifyFragCorrect(FragmentrecPtr regFragPtr); void sendFSREMOVEREQ(Signal* signal, Uint32 tableId); - void sendDROP_TABFILECONF(Signal* signal, TabrecPtr tabPtr); void releaseFragResources(Signal* signal, Uint32 fragIndex); void releaseRootFragRecord(Signal* signal, RootfragmentrecPtr rootPtr); - void sendREL_TABMEMCONF(Signal* signal, TabrecPtr tabPtr); void releaseRootFragResources(Signal* signal, Uint32 tableId); void releaseDirResources(Signal* signal, Uint32 fragIndex, @@ -1051,7 +1005,6 @@ private: void releaseScanRec(Signal* signal); bool searchScanContainer(Signal* signal); void sendNextScanConf(Signal* signal); - void sendScaninfo(Signal* signal); void setlock(Signal* signal); void takeOutActiveScanOp(Signal* signal); void takeOutScanLockQueue(Uint32 scanRecIndex); @@ -1063,15 +1016,8 @@ private: void increaselistcont(Signal* signal); void seizeLeftlist(Signal* signal); void seizeRightlist(Signal* signal); - void allocLongOverflowPage(Signal* signal); - void allocSpecificLongOverflowPage(Signal* signal); - void getLongKeyPage(Signal* signal); - void initLongOverpage(Signal* signal); - void storeLongKeys(Signal* signal); - void storeLongKeysAtPos(Signal* signal); - void reorgLongPage(Signal* signal); + Uint32 readTablePk(Uint32 localkey1); void getElement(Signal* signal); - void searchLongKey(Signal* signal, bool verify); void getdirindex(Signal* signal); void commitdelete(Signal* signal, bool systemRestart); void deleteElement(Signal* signal); @@ -1079,15 +1025,6 @@ private: void releaseLeftlist(Signal* signal); void releaseRightlist(Signal* signal); void checkoverfreelist(Signal* signal); - void deleteLongKey(Signal* signal); - void removeFromPageArrayList(Signal* signal); - void insertPageArrayList(Signal* signal); - void checkPageArrayList(Signal* signal, const char *); - void checkPageB4Insert(Uint32, const char *); - void checkPageB4Remove(Uint32, const char *); - void checkIndexInLongKeyPage(Uint32, const char *); - void printoutInfoAndShutdown(LongKeyPage *); - void releaseLongPage(Signal* signal); void abortOperation(Signal* signal); void accAbortReqLab(Signal* signal, bool sendConf); void commitOperation(Signal* signal); @@ -1105,7 +1042,6 @@ private: void initLcpConnRec(Signal* signal); void initOverpage(Signal* signal); void initPage(Signal* signal); - void initPageZero(Signal* signal); void initRootfragrec(Signal* signal); void putOpInFragWaitQue(Signal* signal); void putOverflowRecInFrag(Signal* signal); @@ -1135,7 +1071,7 @@ private: void seizeRootfragrec(Signal* signal); void seizeScanRec(Signal* signal); void seizeSrVerRec(Signal* signal); - void sendSystemerror(Signal* signal); + void sendSystemerror(Signal* signal, int line); void takeRecOutOfFreeOverdir(Signal* signal); void takeRecOutOfFreeOverpage(Signal* signal); void sendScanHbRep(Signal* signal, Uint32); @@ -1159,8 +1095,6 @@ private: void refaccConnectLab(Signal* signal); void srReadOverPagesLab(Signal* signal); void releaseScanLab(Signal* signal); - void exeoperationLab(Signal* signal); - void saveKeyDataLab(Signal* signal); void lcpOpenUndofileConfLab(Signal* signal); void srFsOpenConfLab(Signal* signal); void checkSyncUndoPagesLab(Signal* signal); @@ -1172,13 +1106,12 @@ private: void srReadPagesLab(Signal* signal); void srDoUndoLab(Signal* signal); void ndbrestart1Lab(Signal* signal); - void initialiseRecordsLab(Signal* signal, Uint32 returnRef, Uint32 retData); + void initialiseRecordsLab(Signal* signal, Uint32 ref, Uint32 data); void srReadPagesAllocLab(Signal* signal); void checkNextBucketLab(Signal* signal); void endsavepageLab(Signal* signal); void saveZeroPageLab(Signal* signal); void srAllocPage0011Lab(Signal* signal); - void allocscanrecLab(Signal* signal); void sendLcpFragidconfLab(Signal* signal); void savepagesLab(Signal* signal); void saveOverPagesLab(Signal* signal); @@ -1192,6 +1125,8 @@ private: void lcp_write_op_to_undolog(Signal* signal); void reenable_expand_after_redo_log_exection_complete(Signal*); + // charsets + void xfrmKeyData(Signal* signal); // Initialisation void initData(); @@ -1281,8 +1216,6 @@ private: /* --------------------------------------------------------------------------------- */ Page8 *page8; /* 8 KB PAGE */ - Page8Ptr aslpPageptr; - Page8Ptr alpPageptr; Page8Ptr ancPageptr; Page8Ptr colPageptr; Page8Ptr ccoPageptr; @@ -1293,29 +1226,17 @@ private: Page8Ptr gdiPageptr; Page8Ptr gePageptr; Page8Ptr gflPageptr; - Page8Ptr glkPageptr; Page8Ptr idrPageptr; Page8Ptr ilcPageptr; - Page8Ptr iloPageptr; Page8Ptr inpPageptr; Page8Ptr iopPageptr; - Page8Ptr ipzPageptr; Page8Ptr lastPageptr; Page8Ptr lastPrevpageptr; Page8Ptr lcnPageptr; Page8Ptr lcnCopyPageptr; Page8Ptr lupPageptr; - Page8Ptr dlkPageptr; - Page8Ptr ipaPagePtr; Page8Ptr priPageptr; Page8Ptr pwiPageptr; - Page8Ptr rfpPageptr; - Page8Ptr relpPageptr; - Page8Ptr rlopPageptr; - Page8Ptr slkPageptr; - Page8Ptr slkCopyPageptr; - Page8Ptr slkapPageptr; - Page8Ptr slkapCopyPageptr; Page8Ptr ciPageidptr; Page8Ptr gsePageidptr; Page8Ptr isoPageptr; @@ -1329,7 +1250,6 @@ private: Page8Ptr ropPageptr; Page8Ptr rpPageptr; Page8Ptr slPageptr; - Page8Ptr slpPageptr; Page8Ptr spPageptr; Uint32 cfirstfreepage; Uint32 cfreepage; @@ -1347,7 +1267,6 @@ private: /* --------------------------------------------------------------------------------- */ Rootfragmentrec *rootfragmentrec; RootfragmentrecPtr rootfragrecptr; - RootfragmentrecPtr tmprootfrgptr; Uint32 crootfragmentsize; Uint32 cfirstfreerootfrag; /* --------------------------------------------------------------------------------- */ @@ -1380,7 +1299,6 @@ private: Uint32 tpriElementptr; Uint32 tgseElementptr; Uint32 tgseContainerptr; - Uint32 tiloIndex; Uint32 trlHead; Uint32 trlRelCon; Uint32 trlNextused; @@ -1389,20 +1307,12 @@ private: Uint32 tlupElemIndex; Uint32 tlupIndex; Uint32 tlupForward; - Uint32 tslkPageIndex; - Uint32 tslkKeyLen; - Uint32 tslkapKeyLen; - Uint32 tslkapPageIndex; - Uint32 tipaArrayPos; - Uint32 trfpArrayPos; - Uint32 tdlkLogicalPageIndex; Uint32 tancNext; Uint32 tancBufType; Uint32 tancContainerptr; Uint32 tancPageindex; Uint32 tancPageid; Uint32 tidrResult; - Uint32 tidrKeyLen; Uint32 tidrElemhead; Uint32 tidrForward; Uint32 tidrPageindex; @@ -1420,15 +1330,11 @@ private: Uint32 tdelForward; Uint32 tiopPageId; Uint32 tipPageId; - Uint32 ttupKeyLength; Uint32 tgeLocked; Uint32 tgeResult; Uint32 tgeContainerptr; Uint32 tgeElementptr; Uint32 tgeForward; - Uint32 tslcResult; - Uint32 tslcPagedir; - Uint32 tslcPageIndex; Uint32 tundoElemIndex; Uint32 texpReceivedBucket; Uint32 texpDirInd; @@ -1453,7 +1359,6 @@ private: Uint32 tscanFlag; Uint32 theadundoindex; Uint32 tgflBufType; - Uint32 thashvalue; Uint32 tgseIsforward; Uint32 tsscIsforward; Uint32 trscIsforward; @@ -1462,21 +1367,10 @@ private: Uint32 tisoIsforward; Uint32 tgseIsLocked; Uint32 tsscIsLocked; - Uint32 tkey1; - Uint32 tkey2; - Uint32 tkey3; - Uint32 tkey4; Uint32 tkeylen; - Uint32 tkSize; - Uint32 tlhfragbits; - Uint32 tlhdirbits; - Uint32 tlocalkeylen; - Uint32 tmaxloadfactor; - Uint32 tminloadfactor; Uint32 tmp; Uint32 tmpP; Uint32 tmpP2; - Uint32 taslpDirIndex; Uint32 tmp1; Uint32 tmp2; Uint32 tgflPageindex; @@ -1490,9 +1384,6 @@ private: Uint32 trsbPageindex; Uint32 tnciPageindex; Uint32 tlastPrevconptr; - Uint32 treqinfo; - Uint32 transactionid1; - Uint32 transactionid2; Uint32 tresult; Uint32 tslUpdateHeader; Uint32 tuserptr; @@ -1505,15 +1396,12 @@ private: Uint32 tgdiPageindex; Uint32 tiopIndex; Uint32 tnciTmp; - Uint32 tlenKeyinfo; Uint32 tullIndex; Uint32 turlIndex; Uint32 tlfrTmp1; Uint32 tlfrTmp2; - Uint32 tudqeIndex; Uint32 tscanTrid1; Uint32 tscanTrid2; - Uint32 taccscanTmp; Uint16 clastUndoPageIdWritten; Uint32 cactiveCheckpId; @@ -1557,10 +1445,13 @@ private: Uint32 cexcPrevpageindex; Uint32 cexcPrevforward; Uint32 clocalkey[32]; - Uint32 ckeys[2048]; + union { + Uint32 ckeys[2048 * MAX_XFRM_MULTIPLY]; + Uint64 ckeys_align; + }; Uint32 c_errorInsert3000_TableId; - Uint32 cSrUndoRecords[5]; + Uint32 cSrUndoRecords[UndoHeader::ZNO_UNDORECORD_TYPES]; }; #endif diff --git a/ndb/src/kernel/blocks/dbacc/DbaccInit.cpp b/ndb/src/kernel/blocks/dbacc/DbaccInit.cpp index c98c072cc89..59a622b60e6 100644 --- a/ndb/src/kernel/blocks/dbacc/DbaccInit.cpp +++ b/ndb/src/kernel/blocks/dbacc/DbaccInit.cpp @@ -59,10 +59,24 @@ void Dbacc::initData() void Dbacc::initRecords() { // Records with dynamic sizes + page8 = (Page8*)allocRecord("Page8", + sizeof(Page8), + cpagesize, + false); + + operationrec = (Operationrec*)allocRecord("Operationrec", + sizeof(Operationrec), + coprecsize); + dirRange = (DirRange*)allocRecord("DirRange", sizeof(DirRange), cdirrangesize); + undopage = (Undopage*)allocRecord("Undopage", + sizeof(Undopage), + cundopagesize, + false); + directoryarray = (Directoryarray*)allocRecord("Directoryarray", sizeof(Directoryarray), cdirarraysize); @@ -83,19 +97,10 @@ void Dbacc::initRecords() sizeof(LcpConnectrec), clcpConnectsize); - operationrec = (Operationrec*)allocRecord("Operationrec", - sizeof(Operationrec), - coprecsize); - overflowRecord = (OverflowRecord*)allocRecord("OverflowRecord", sizeof(OverflowRecord), coverflowrecsize); - page8 = (Page8*)allocRecord("Page8", - sizeof(Page8), - cpagesize, - false); - rootfragmentrec = (Rootfragmentrec*)allocRecord("Rootfragmentrec", sizeof(Rootfragmentrec), crootfragmentsize); @@ -112,11 +117,6 @@ void Dbacc::initRecords() sizeof(Tabrec), ctablesize); - undopage = (Undopage*)allocRecord("Undopage", - sizeof(Undopage), - cundopagesize, - false); - // Initialize BAT for interface to file system NewVARIABLE* bat = allocateBat(3); @@ -133,27 +133,11 @@ void Dbacc::initRecords() }//Dbacc::initRecords() Dbacc::Dbacc(const class Configuration & conf): - SimulatedBlock(DBACC, conf) + SimulatedBlock(DBACC, conf), + c_tup(0) { - Uint32 log_page_size= 0; BLOCK_CONSTRUCTOR(Dbacc); - const ndb_mgm_configuration_iterator * p = conf.getOwnConfigIterator(); - ndbrequire(p != 0); - - ndb_mgm_get_int_parameter(p, CFG_DB_UNDO_INDEX_BUFFER, - &log_page_size); - - /** - * Always set page size in half MBytes - */ - cundopagesize= (log_page_size / sizeof(Undopage)); - Uint32 mega_byte_part= cundopagesize & 15; - if (mega_byte_part != 0) { - jam(); - cundopagesize+= (16 - mega_byte_part); - } - // Transit signals addRecSignal(GSN_DUMP_STATE_ORD, &Dbacc::execDUMP_STATE_ORD); addRecSignal(GSN_DEBUG_SIG, &Dbacc::execDEBUG_SIG); @@ -197,6 +181,80 @@ Dbacc::Dbacc(const class Configuration & conf): addRecSignal(GSN_SET_VAR_REQ, &Dbacc::execSET_VAR_REQ); initData(); + +#ifdef VM_TRACE + { + void* tmp[] = { &expDirRangePtr, + &gnsDirRangePtr, + &newDirRangePtr, + &rdDirRangePtr, + &nciOverflowrangeptr, + &expDirptr, + &rdDirptr, + &sdDirptr, + &nciOverflowDirptr, + &fragrecptr, + &fsConnectptr, + &fsOpptr, + &lcpConnectptr, + &operationRecPtr, + &idrOperationRecPtr, + ©InOperPtr, + ©OperPtr, + &mlpqOperPtr, + &queOperPtr, + &readWriteOpPtr, + &iopOverflowRecPtr, + &tfoOverflowRecPtr, + &porOverflowRecPtr, + &priOverflowRecPtr, + &rorOverflowRecPtr, + &sorOverflowRecPtr, + &troOverflowRecPtr, + &ancPageptr, + &colPageptr, + &ccoPageptr, + &datapageptr, + &delPageptr, + &excPageptr, + &expPageptr, + &gdiPageptr, + &gePageptr, + &gflPageptr, + &idrPageptr, + &ilcPageptr, + &inpPageptr, + &iopPageptr, + &lastPageptr, + &lastPrevpageptr, + &lcnPageptr, + &lcnCopyPageptr, + &lupPageptr, + &priPageptr, + &pwiPageptr, + &ciPageidptr, + &gsePageidptr, + &isoPageptr, + &nciPageidptr, + &rsbPageidptr, + &rscPageidptr, + &slPageidptr, + &sscPageidptr, + &rlPageptr, + &rlpPageptr, + &ropPageptr, + &rpPageptr, + &slPageptr, + &spPageptr, + &rootfragrecptr, + &scanPtr, + &srVersionPtr, + &tabptr, + &undopageptr + }; + init_globals_list(tmp, sizeof(tmp)/sizeof(tmp[0])); + } +#endif }//Dbacc::Dbacc() Dbacc::~Dbacc() diff --git a/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp b/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp index a3880e2df1d..261a0acfa81 100644 --- a/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp +++ b/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp @@ -16,6 +16,7 @@ #define DBACC_C #include "Dbacc.hpp" +#include <my_sys.h> #include <AttributeHeader.hpp> #include <signaldata/AccFrag.hpp> @@ -27,6 +28,8 @@ #include <signaldata/FsRemoveReq.hpp> #include <signaldata/DropTab.hpp> #include <signaldata/DumpStateOrd.hpp> +#include <KeyDescriptor.hpp> + // TO_DO_RONM is a label for comments on what needs to be improved in future versions // when more time is given. @@ -52,8 +55,7 @@ Dbacc::remainingUndoPages(){ // There can not be more than cundopagesize remaining if (Remaining <= 0){ // No more undolog, crash node - progError(__LINE__, - ERR_NO_MORE_UNDOLOG, + progError(__LINE__, NDBD_EXIT_NO_MORE_UNDOLOG, "There are more than 1Mbyte undolog writes outstanding"); } return Remaining; @@ -532,7 +534,14 @@ void Dbacc::execNDB_STTOR(Signal* signal) void Dbacc::execSTTOR(Signal* signal) { jamEntry(); - // tstartphase = signal->theData[1]; + Uint32 tstartphase = signal->theData[1]; + switch (tstartphase) { + case 1: + jam(); + c_tup = (Dbtup*)globalData.getBlock(DBTUP); + ndbrequire(c_tup != 0); + break; + } tuserblockref = signal->theData[3]; csignalkey = signal->theData[6]; sttorrysignalLab(signal); @@ -669,6 +678,20 @@ void Dbacc::execREAD_CONFIG_REQ(Signal* signal) theConfiguration.getOwnConfigIterator(); ndbrequire(p != 0); + Uint32 log_page_size= 0; + ndb_mgm_get_int_parameter(p, CFG_DB_UNDO_INDEX_BUFFER, + &log_page_size); + + /** + * Always set page size in half MBytes + */ + cundopagesize= (log_page_size / sizeof(Undopage)); + Uint32 mega_byte_part= cundopagesize & 15; + if (mega_byte_part != 0) { + jam(); + cundopagesize+= (16 - mega_byte_part); + } + ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_ACC_DIR_RANGE, &cdirrangesize)); ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_ACC_DIR_ARRAY, &cdirarraysize)); ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_ACC_FRAGMENT, &cfragmentsize)); @@ -1037,7 +1060,7 @@ void Dbacc::execACCFRAGREQ(Signal* signal) // config mismatch - do not crash if release compiled if (tabptr.i >= ctablesize) { jam(); - addFragRefuse(signal, 800); + addFragRefuse(signal, 640); return; } #endif @@ -1119,8 +1142,8 @@ void Dbacc::execACCFRAGREQ(Signal* signal) Uint32 userPtr = req->userPtr; BlockReference retRef = req->userRef; rootfragrecptr.p->rootState = ACTIVEROOT; - AccFragConf * const conf = (AccFragConf*)&signal->theData[0]; + AccFragConf * const conf = (AccFragConf*)&signal->theData[0]; conf->userPtr = userPtr; conf->rootFragPtr = rootfragrecptr.i; conf->fragId[0] = rootfragrecptr.p->fragmentid[0]; @@ -1144,6 +1167,7 @@ void Dbacc::addFragRefuse(Signal* signal, Uint32 errorCode) return; }//Dbacc::addFragRefuseEarly() + void Dbacc::execDROP_TAB_REQ(Signal* signal){ jamEntry(); @@ -1503,6 +1527,7 @@ void Dbacc::initOpRec(Signal* signal) operationRecPtr.p->hashValue = signal->theData[3]; operationRecPtr.p->tupkeylen = signal->theData[4]; + operationRecPtr.p->xfrmtupkeylen = signal->theData[4]; operationRecPtr.p->transId1 = signal->theData[5]; operationRecPtr.p->transId2 = signal->theData[6]; operationRecPtr.p->transactionstate = ACTIVE; @@ -1541,6 +1566,9 @@ void Dbacc::initOpRec(Signal* signal) // bit to mark lock operation operationRecPtr.p->isAccLockReq = (Treqinfo >> 31) & 0x1; + + // undo log is not run via ACCKEYREQ + operationRecPtr.p->isUndoLogReq = 0; }//Dbacc::initOpRec() /* --------------------------------------------------------------------------------- */ @@ -1614,6 +1642,10 @@ void Dbacc::execACCKEYREQ(Signal* signal) ndbrequire(operationRecPtr.p->transactionstate == IDLE); initOpRec(signal); + // normalize key if any char attr + if (! operationRecPtr.p->isAccLockReq && fragrecptr.p->hasCharAttr) + xfrmKeyData(signal); + /*---------------------------------------------------------------*/ /* */ /* WE WILL USE THE HASH VALUE TO LOOK UP THE PROPER MEMORY */ @@ -1713,6 +1745,19 @@ void Dbacc::execACCKEYREQ(Signal* signal) return; }//Dbacc::execACCKEYREQ() +void +Dbacc::xfrmKeyData(Signal* signal) +{ + Uint32 table = fragrecptr.p->myTableId; + Uint32 dst[MAX_KEY_SIZE_IN_WORDS * MAX_XFRM_MULTIPLY]; + Uint32 keyPartLen[MAX_ATTRIBUTES_IN_INDEX]; + Uint32* src = &signal->theData[7]; + Uint32 len = xfrm_key(table, src, dst, sizeof(dst) >> 2, keyPartLen); + ndbrequire(len); // 0 means error + memcpy(src, dst, len << 2); + operationRecPtr.p->xfrmtupkeylen = len; +} + void Dbacc::accIsLockedLab(Signal* signal) { ndbrequire(csystemRestart == ZFALSE); @@ -1786,8 +1831,6 @@ void Dbacc::insertExistElemLab(Signal* signal) /* --------------------------------------------------------------------------------- */ void Dbacc::insertelementLab(Signal* signal) { - Uint32 tinsKeyLen; - if (fragrecptr.p->createLcp == ZTRUE) { if (remainingUndoPages() < ZMIN_UNDO_PAGES_AT_OPERATION) { jam(); @@ -1805,46 +1848,9 @@ void Dbacc::insertelementLab(Signal* signal) }//if }//if if (fragrecptr.p->keyLength != operationRecPtr.p->tupkeylen) { + // historical ndbrequire(fragrecptr.p->keyLength == 0); }//if - if (fragrecptr.p->keyLength != 0) { - ndbrequire(operationRecPtr.p->tupkeylen <= 8); - for (Uint32 i = 0; i < operationRecPtr.p->tupkeylen; i++) { - jam(); - ckeys[i] = signal->theData[i + 7]; - }//for - tinsKeyLen = operationRecPtr.p->tupkeylen; - } else { - jam(); - seizePage(signal); - if (tresult > ZLIMIT_OF_ERROR) { - jam(); - acckeyref1Lab(signal, tresult); - return; - }//if - operationRecPtr.p->keyinfoPage = spPageptr.i; - for (Uint32 i = 0; i < signal->theData[4]; i++) { - spPageptr.p->word32[i] = signal->theData[i + 7]; - }//for - - getLongKeyPage(signal); - if (tresult > ZLIMIT_OF_ERROR) { - jam(); - acckeyref1Lab(signal, tresult); - return; - }//if - slkPageptr = glkPageptr; - slkCopyPageptr.i = operationRecPtr.p->keyinfoPage; - ptrCheckGuard(slkCopyPageptr, cpagesize, page8); - tslkKeyLen = operationRecPtr.p->tupkeylen; - storeLongKeys(signal); - ckeys[0] = (slkPageptr.p->word32[ZPOS_PAGE_ID] << 10) + tslkPageIndex; - tinsKeyLen = ZACTIVE_LONG_KEY_LEN; - rpPageptr.i = operationRecPtr.p->keyinfoPage; - ptrCheckGuard(rpPageptr, cpagesize, page8); - releasePage(signal); - operationRecPtr.p->keyinfoPage = RNIL; - }//if signal->theData[0] = operationRecPtr.p->userptr; Uint32 blockNo = refToBlock(operationRecPtr.p->userblockref); @@ -1868,7 +1874,6 @@ void Dbacc::insertelementLab(Signal* signal) idrPageptr = gdiPageptr; tidrPageindex = tgdiPageindex; tidrForward = ZTRUE; - tidrKeyLen = tinsKeyLen; idrOperationRecPtr = operationRecPtr; clocalkey[0] = localKey; operationRecPtr.p->localdata[0] = localKey; @@ -2314,14 +2319,14 @@ void Dbacc::execACC_COMMITREQ(Signal* signal) operationRecPtr.p->transactionstate = IDLE; operationRecPtr.p->operation = ZUNDEFINED_OP; if(Toperation != ZREAD){ + rootfragrecptr.i = fragrecptr.p->myroot; + ptrCheckGuard(rootfragrecptr, crootfragmentsize, rootfragmentrec); rootfragrecptr.p->m_commit_count++; if (Toperation != ZINSERT) { if (Toperation != ZDELETE) { return; } else { jam(); - rootfragrecptr.i = fragrecptr.p->myroot; - ptrCheckGuard(rootfragrecptr, crootfragmentsize, rootfragmentrec); rootfragrecptr.p->noOfElements--; fragrecptr.p->slack += operationRecPtr.p->insertDeleteLen; if (fragrecptr.p->slack > fragrecptr.p->slackCheck) { @@ -2341,8 +2346,6 @@ void Dbacc::execACC_COMMITREQ(Signal* signal) }//if } else { jam(); /* EXPAND PROCESS HANDLING */ - rootfragrecptr.i = fragrecptr.p->myroot; - ptrCheckGuard(rootfragrecptr, crootfragmentsize, rootfragmentrec); rootfragrecptr.p->noOfElements++; fragrecptr.p->slack -= operationRecPtr.p->insertDeleteLen; if (fragrecptr.p->slack >= (1u << 31)) { @@ -2460,6 +2463,7 @@ void Dbacc::execACC_LOCKREQ(Signal* signal) signal->theData[4] = 1; // fake primKeyLen signal->theData[5] = req->transId1; signal->theData[6] = req->transId2; + // enter local key in place of PK signal->theData[7] = req->tupAddr; EXECUTE_DIRECT(DBACC, GSN_ACCKEYREQ, signal, 8); // translate the result @@ -2484,26 +2488,6 @@ void Dbacc::execACC_LOCKREQ(Signal* signal) *sig = *req; return; } - operationRecPtr.i = req->accOpPtr; - ptrCheckGuard(operationRecPtr, coprecsize, operationrec); - fragrecptr.i = operationRecPtr.p->fragptr; - ptrCheckGuard(fragrecptr, cfragmentsize, fragmentrec); - if (fragrecptr.p->keyLength == 0 && - // should test some state variable - operationRecPtr.p->elementPage != RNIL) { - jam(); - // re-compute long key vars - Page8Ptr tPageptr; - tPageptr.i = operationRecPtr.p->elementPage; - ptrCheckGuard(tPageptr, cpagesize, page8); - Uint32 tKeyptr = - operationRecPtr.p->elementPointer + - operationRecPtr.p->elementIsforward * - (ZELEM_HEAD_SIZE + fragrecptr.p->localkeylen); - tslcPageIndex = tPageptr.p->word32[tKeyptr] & 0x3ff; - tslcPagedir = tPageptr.p->word32[tKeyptr] >> 10; - searchLongKey(signal, false); - } if (lockOp == AccLockReq::Unlock) { jam(); // do unlock via ACC_COMMITREQ (immediate) @@ -2857,14 +2841,6 @@ void Dbacc::insertContainer(Signal* signal) idrPageptr.p->word32[tidrIndex] = clocalkey[tidrInputIndex]; /* INSERTS LOCALKEY */ tidrIndex += tidrForward; }//for - guard26 = tidrKeyLen - 1; - arrGuard(guard26, 8); - for (tidrInputIndex = 0; tidrInputIndex <= guard26; tidrInputIndex++) { - dbgWord32(idrPageptr, tidrIndex, ckeys[tidrInputIndex]); - arrGuard(tidrIndex, 2048); - idrPageptr.p->word32[tidrIndex] = ckeys[tidrInputIndex]; /* INSERTS TUPLE KEY */ - tidrIndex += tidrForward; - }//for tidrContLen = idrPageptr.p->word32[tidrContainerptr] << 6; tidrContLen = tidrContLen >> 6; dbgWord32(idrPageptr, tidrContainerptr, (tidrContainerlen << 26) | tidrContLen); @@ -3198,1215 +3174,6 @@ void Dbacc::seizeRightlist(Signal* signal) increaselistcont(signal); }//Dbacc::seizeRightlist() - -//--------------------------------------------------------------------------------- -// ALLOC_SPECIFIC_LONG_OVERFLOW_PAGE -// -// DESCRIPTION: ALLOCATES A LONG OVER FLOW PAGE AND PUTS IT IN A SPECIFIED -// DIRINDEX. THIS IS TO SUPPORT AN UNDO_DELETE AFTER AN -// UNDO_INSERT ON THE SAME LONG KEY IN A LCP. -// UNDO_INSERT ONLY HAVE A REFERENCE TO THE KEY AND TO MAKE -// IT POSSIBLE TO DELETE THE KEY, THE REFERENCE MUST BE -// ACCURATE, WHICH MEANS THE KEY MUST BE SAVED ON THE SAME -// PLACE IT WAS DELETED FROM. -//--------------------------------------------------------------------------------- -void Dbacc::allocSpecificLongOverflowPage(Signal* signal) -{ - DirRangePtr aloDirRangePtr; - DirectoryarrayPtr aloOverflowDirptr; - - if ((cfirstfreepage == RNIL) && - (cfreepage >= cpagesize)) { - jam(); - zpagesize_error("Dbacc::allocSpecificLongOverflowPage"); - tresult = ZPAGESIZE_ERROR; - return; - } - - if ((cfirstfreedir == RNIL) && - (cdirarraysize <= cdirmemory)) { - jam(); - tresult = ZDIRSIZE_ERROR; - return; - } - - tmpP = taslpDirIndex; - aloDirRangePtr.i = fragrecptr.p->overflowdir; - tmpP2 = tmpP >> 8; - tmpP = tmpP & 0xff; - ptrCheckGuard(aloDirRangePtr, cdirrangesize, dirRange); - arrGuard(tmpP2, 256); - - if (aloDirRangePtr.p->dirArray[tmpP2] == RNIL) { - jam(); - seizeDirectory(signal); - if (tresult > ZLIMIT_OF_ERROR) { - jam(); - sendSystemerror(signal); - return; - } - aloDirRangePtr.p->dirArray[tmpP2] = sdDirptr.i; - } else { - jam(); - sdDirptr.i = RNIL; - ptrNull(sdDirptr); - } - - aloOverflowDirptr.i = aloDirRangePtr.p->dirArray[tmpP2]; - ptrCheckGuard(aloOverflowDirptr, cdirarraysize, directoryarray); - seizePage(signal); - if (tresult > ZLIMIT_OF_ERROR) { - jam(); - sendSystemerror(signal); - return; - }//if - - if (aloOverflowDirptr.p->pagep[tmpP] != RNIL) { - jam(); - sendSystemerror(signal); - return; - } - - aloOverflowDirptr.p->pagep[tmpP] = spPageptr.i; - iloPageptr.p = spPageptr.p; - iloPageptr.i = spPageptr.i; - tiloIndex = taslpDirIndex; - initLongOverpage(signal); - aslpPageptr.i = spPageptr.i; - aslpPageptr.p = spPageptr.p; -}//Dbacc::allocSpecificLongOverflowPage - -/* --------------------------------------------------------------------------------- */ -/* ALLOC_LONG_OVERFLOW_PAGE */ -/* DESCRIPTION: */ -/* --------------------------------------------------------------------------------- */ -void Dbacc::allocLongOverflowPage(Signal* signal) -{ - DirRangePtr aloDirRangePtr; - DirectoryarrayPtr aloOverflowDirptr; - OverflowRecordPtr aloOverflowRecPtr; - Uint32 taloIndex; - - if ((cfirstfreepage == RNIL) && - (cfreepage >= cpagesize)) { - jam(); - zpagesize_error("Dbacc::allocLongOverflowPage"); - tresult = ZPAGESIZE_ERROR; - return; - }//if - if ((cfirstfreedir == RNIL) && - (cdirarraysize <= cdirmemory)) { - jam(); - tresult = ZDIRSIZE_ERROR; - return; - }//if - if (fragrecptr.p->firstFreeDirindexRec != RNIL) { - jam(); - aloOverflowRecPtr.i = fragrecptr.p->firstFreeDirindexRec; - ptrCheckGuard(aloOverflowRecPtr, coverflowrecsize, overflowRecord); - troOverflowRecPtr.p = aloOverflowRecPtr.p; - takeRecOutOfFreeOverdir(signal); - taloIndex = aloOverflowRecPtr.p->dirindex; - rorOverflowRecPtr = aloOverflowRecPtr; - releaseOverflowRec(signal); - } else { - jam(); - taloIndex = fragrecptr.p->lastOverIndex; - fragrecptr.p->lastOverIndex++; - }//if - tmpP = taloIndex; - aloDirRangePtr.i = fragrecptr.p->overflowdir; - tmpP2 = tmpP >> 8; - tmpP = tmpP & 0xff; - ptrCheckGuard(aloDirRangePtr, cdirrangesize, dirRange); - arrGuard(tmpP2, 256); - if (aloDirRangePtr.p->dirArray[tmpP2] == RNIL) { - jam(); - seizeDirectory(signal); - ndbrequire(tresult <= ZLIMIT_OF_ERROR); - aloDirRangePtr.p->dirArray[tmpP2] = sdDirptr.i; - } else { - jam(); - sdDirptr.i = RNIL; - ptrNull(sdDirptr); - }//if - aloOverflowDirptr.i = aloDirRangePtr.p->dirArray[tmpP2]; - ptrCheckGuard(aloOverflowDirptr, cdirarraysize, directoryarray); - seizePage(signal); - ndbrequire(tresult <= ZLIMIT_OF_ERROR); - aloOverflowDirptr.p->pagep[tmpP] = spPageptr.i; - iloPageptr = spPageptr; - tiloIndex = taloIndex; - initLongOverpage(signal); - alpPageptr = spPageptr; - ipaPagePtr = spPageptr; - tipaArrayPos = 3; - insertPageArrayList(signal); -}//Dbacc::allocLongOverflowPage() - -/* --------------------------------------------------------------------------------- */ -/* GET_LONG_KEY_PAGE */ -/* DESCRIPTION: SEARCH FOR A FREE OVERFLOW PAGE TO STORE A LONG KEY. */ -/* LONG_KEY_PAGE_PTR IS RETURNED. */ -/* --------------------------------------------------------------------------------- */ -void Dbacc::getLongKeyPage(Signal* signal) -{ - LongKeyPage *glkPage; - - jam(); - - Uint32 tglkLongIndex = 0; - - ndbrequire(operationRecPtr.p->tupkeylen <= ZWORDS_IN_PAGE - ZHEAD_SIZE); - - // Do not look in longKeyPageArray[tglkLongIndex] where the pages are to small. - if(operationRecPtr.p->tupkeylen < 128) { - jam(); - tglkLongIndex = 0; - } else { - jam(); - tglkLongIndex = (operationRecPtr.p->tupkeylen - 128) / 512; - }//if - - // Go through the longKeyPageArray and search for a page. - for (; tglkLongIndex <= ZMAX_LONG_KEY_ARRAY_INDEX; tglkLongIndex++) { - jam(); - glkPageptr.i = fragrecptr.p->longKeyPageArray[tglkLongIndex]; - - if (glkPageptr.i != RNIL) { - // A page is found. - jam(); - do { - ptrCheckGuard(glkPageptr, cpagesize, page8); - glkPage = (LongKeyPage *) &glkPageptr.p->word32[0]; - - // Check page if there is enough memory available. Accept only page - // with free_area > tupkeylen, this leaves at least one word for eventually - // an increase in the index area. - if (glkPage->header.freeArea > operationRecPtr.p->tupkeylen){ - // The page found is OK - jam(); - return; - } else { - // Not enough space in page, look in the next page if not RNIL, - // otherwise continue with for-loop. - jam(); - glkPageptr.i = glkPage->header.nextPage; - } - }//do - while (glkPageptr.i != RNIL); - }//if - }//for - - // No page with enough space was available, allocate a new page! - jam(); - allocLongOverflowPage(signal); - glkPageptr = alpPageptr; -}//Dbacc::getLongKeyPage() - -/* --------------------------------------------------------------------------------- */ -/* INIT_LONG_OVERPAGE */ -/* INPUT. ILO_PAGEPTR, POINTER TO AN OVERFLOW PAGE RECORD */ -/* DESCRIPTION: CONTAINERS AND FREE LISTS OF THE PAGE, GET INITIALE VALUE */ -/* ACCORDING TO LH3 AND PAGE STRUCTOR DISACRIPTION OF NDBACC BLOCK */ -/* --------------------------------------------------------------------------------- */ -void Dbacc::initLongOverpage(Signal* signal) -{ - iloPageptr.p->word32[ZPOS_PAGE_ID] = tiloIndex; - iloPageptr.p->word32[ZPOS_PAGE_TYPE] = ZLONG_PAGE_TYPE << ZPOS_PAGE_TYPE_BIT; - iloPageptr.p->word32[ZPOS_NO_ELEM_IN_PAGE] = 0; - iloPageptr.p->word32[ZPOS_OVERFLOWREC] = RNIL; - iloPageptr.p->word32[ZPOS_FREE_AREA_IN_PAGE] = ZWORDS_IN_PAGE - ZHEAD_SIZE; - iloPageptr.p->word32[ZPOS_LAST_INDEX] = 0; - iloPageptr.p->word32[ZPOS_INSERT_INDEX] = ZHEAD_SIZE; - iloPageptr.p->word32[ZPOS_ARRAY_POS] = ZDEFAULT_LIST; - iloPageptr.p->word32[ZPOS_NEXT_FREE_INDEX] = 0; - iloPageptr.p->word32[ZPOS_NEXT_PAGE] = RNIL; - iloPageptr.p->word32[ZPOS_PREV_PAGE] = RNIL; - iloPageptr.p->word32[12] = 0; - iloPageptr.p->word32[13] = 0; - iloPageptr.p->word32[14] = 0; - iloPageptr.p->word32[15] = 0; - // Initialize free indexes - for (int i = 1; i < (ZWORDS_IN_PAGE - ZHEAD_SIZE); i++) - iloPageptr.p->word32[ZWORDS_IN_PAGE - i] = i + 1; -}//Dbacc::initLongOverpage() - -//--------------------------------------------------------------------------------- -// STORE_LONG_KEYS_AT_POS -// -// INPUT: SLKAP_PAGEPTR -// SLKAP_COPY_PAGEPTR -// TSLKAP_KEY_LEN -// TSLKAP_PAGE_INDEX -// -// DESCRIPTION: A LONG ELEMENT IS STORED ON A LONG_KEY_PAGE AT A -// SPECIFIC POSITION. THIS FUNCTION IS USED BY UNDO_DELETE. -//--------------------------------------------------------------------------------- -void Dbacc::storeLongKeysAtPos(Signal* signal) -{ - Uint32 tslkapHighestIndex; - Uint32 tslkapLastSize; - Uint32 tslkapInsertIndex; - Uint32 tslkapIndexIncreaseSize; - Uint32 tslkapTmp; - - LongKeyPage *slkapPage; - - jam(); - slkapPage = (LongKeyPage *) &slkapPageptr.p->word32[0]; - -#ifdef VM_TRACE - checkIndexInLongKeyPage(slkapPageptr.i, "storeLongKeysAtPos"); -#endif - - // if (csystemRestart != ZTRUE) { - if (cundoLogActive != ZTRUE) { - //------------------------------------------------------------- - // This function is only allowed to be called during - // undolog execution. - //------------------------------------------------------------- - jam(); - sendSystemerror(signal); - return; - } - - if (slkapPage->word32[ZWORDS_IN_PAGE - tslkapPageIndex] >> 16 != 0 ) { - //------------------------------------------------------------- - // The index should be empty, we have a serious problem. - //------------------------------------------------------------- - jam(); - sendSystemerror(signal); - return; - } - - //------------------------------------------------------------- - // Calculate some variables to use later. - //------------------------------------------------------------- - tslkapHighestIndex = slkapPage->header.highestIndex; - tslkapPageIndex > tslkapHighestIndex ? - tslkapIndexIncreaseSize = tslkapPageIndex - tslkapHighestIndex : - tslkapIndexIncreaseSize = 0; - - slkapPage->header.highestIndex += tslkapIndexIncreaseSize; - - if ((slkapPage->header.freeArea - tslkapIndexIncreaseSize) - < tslkapKeyLen) { - //------------------------------------------------------------- - // Not enough area in the page, a serious problem. - //------------------------------------------------------------- - jam(); - sendSystemerror(signal); - return; - } - - //------------------------------------------------------------- - // Fix the free index list. We might put in a key in the - // middle of the list, so we must fix the free list and the - // free index pointers. - //------------------------------------------------------------- - slkapPage->header.nextFreeIndex = 0; - - for (Uint32 i = tslkapHighestIndex + tslkapIndexIncreaseSize; i > 0; i--) { - if (i == tslkapPageIndex) { - // The key index shall not be in the free list. - continue; - } - - if (slkapPage->word32[ZWORDS_IN_PAGE - i] >> 16 == 0 ) { - // Go through all empty indexes. - slkapPage->word32[ZWORDS_IN_PAGE - i] = slkapPage->header.nextFreeIndex; - arrGuard(i, 2048); - slkapPage->header.nextFreeIndex = i; - } - } - - //------------------------------------------------------------- - // Decrement the free area in page according to the above - // increase in index size. - //------------------------------------------------------------- - slkapPage->header.freeArea -= tslkapIndexIncreaseSize; - - tslkapLastSize = ZWORDS_IN_PAGE - slkapPage->header.highestIndex - - slkapPage->header.insertPos; - - //------------------------------------------------------------- - // Check if we have to reorganize the page. - //------------------------------------------------------------- - if (tslkapLastSize >= tslkapKeyLen) { - jam(); - } else { - jam(); - relpPageptr.p = slkapPageptr.p; - reorgLongPage(signal); - } - - //------------------------------------------------------------- - // Insert the key and update page attributes. - //------------------------------------------------------------- - jam(); - // Increase the number of element in the page. - slkapPage->header.noOfElements++; - jam(); - // Put in the key reference into the index. The reference - // consists of key length and insert position. - arrGuard(ZWORDS_IN_PAGE - tslkapPageIndex, 2048); - slkapPage->word32[ZWORDS_IN_PAGE - tslkapPageIndex] = - slkapPage->header.insertPos | (tslkapKeyLen << 16); - jam(); - // Increase the key insert position. - tslkapInsertIndex = slkapPage->header.insertPos; - slkapPage->header.insertPos += tslkapKeyLen; - jam(); - // Decrease the free area. - slkapPage->header.freeArea -= tslkapKeyLen; - jam(); - - // Update pageArrayPos. insertPageArrayList() called from execACC_OVER_REC - // needs this value. - if (slkapPage->header.freeArea < 128) { - jam(); - slkapPage->header.pageArrayPos = 4; - } else { - jam(); - slkapPage->header.pageArrayPos = (slkapPage->header.freeArea - 128) / 512; - }//if - - // Store the actual key at the insert position. - Uint32 guard27 = tslkapKeyLen - 1; - arrGuard(guard27 + tslkapInsertIndex, 2048); - for (tslkapTmp = 0; tslkapTmp <= guard27; tslkapTmp++) { - jam(); - slkapPage->word32[tslkapTmp + tslkapInsertIndex] = slkapCopyPageptr.p->word32[tslkapTmp]; - }//for -}//Dbacc::storeLongKeysAtPos - -/* --------------------------------------------------------------------------------- */ -/* STORE_LONG_KEYS */ -/* INPUT: SLK_PAGEPTR */ -/* SLK_COPY_PAGEPTR */ -/* TSLK_KEY_LEN */ -/* OUTPUT: TSLK_PAGE_INDEX */ -/* */ -/* DESCRIPTION: A LONG ELEMENT IS STORED ON A LONG_KEY_PAGE. */ -/* --------------------------------------------------------------------------------- */ -void Dbacc::storeLongKeys(Signal* signal) -{ - Uint32 tslkLastSize; - Uint32 tslkInsertIndex; - Uint32 tslkArrayPos; - Uint32 tslkTmp; - Uint32 guard27; - LongKeyPage *slkPage; - - jam(); - slkPage = (LongKeyPage *) &slkPageptr.p->word32[0]; - -#ifdef VM_TRACE - checkIndexInLongKeyPage(slkPageptr.i, "storeLongKeys1"); -#endif - - // Accept only page with free_area > tupkeylen, this leaves at least - // one word for eventually an increase in the index area. - ndbrequire(slkPage->header.freeArea > tslkKeyLen); - - dbgWord32(slkPageptr, ZPOS_LAST_INDEX, slkPage->header.highestIndex); - dbgWord32(slkPageptr, ZPOS_INSERT_INDEX, slkPage->header.insertPos); - - tslkLastSize = ZWORDS_IN_PAGE - slkPage->header.highestIndex - slkPage->header.insertPos; - - if (tslkLastSize > operationRecPtr.p->tupkeylen) { - // WE DO NOT NEED TO REORGANIZE THE PAGE TO INSERT THE NEW KEY. IT FITS INTO THE - // SIZE REMAINING AT THE END. - jam(); - } else { - // THE KEY FITS INTO THE PAGE BUT ONLY AFTER REORGANISING THE PAGE. - jam(); - relpPageptr.p = slkPageptr.p; - reorgLongPage(signal); - }//if - - if (slkPage->header.nextFreeIndex == 0) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* THE PAGE INDEX HAS NO EMPTY SLOTS. WE MUST EXTEND THE PAGE INDEX BY ONE NEW SLOT.*/ - /* --------------------------------------------------------------------------------- */ - tslkPageIndex = slkPage->header.highestIndex + 1; - } else { - jam(); - tslkPageIndex = slkPage->header.nextFreeIndex; - }//if - - if (fragrecptr.p->createLcp == ZTRUE) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* ON LONG PAGES WE USE A PHYSIOLOGICAL LOGGING SCHEME. THIS MEANS THAT WE ONLY NEED*/ - /* TO SPECIFY WHICH INDEX TO DELETE IN ORDER TO UNDO THE CHANGES WE DO. THE */ - /* POSSIBLE REORGANISATION DO NOT CHANGE THE LOGICAL LAYOUT OF THE PAGE. */ - /* --------------------------------------------------------------------------------- */ - datapageptr.p = slkPageptr.p; - cundoElemIndex = tslkPageIndex; - cundoinfolength = 0; - undoWritingProcess(signal); - }//if - - if (slkPage->header.nextFreeIndex == 0) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* THE PAGE INDEX HAS NO EMPTY SLOTS. WE MUST EXTEND THE PAGE INDEX BY ONE NEW SLOT.*/ - /* --------------------------------------------------------------------------------- */ - dbgWord32(slkPageptr, ZPOS_LAST_INDEX, slkPage->header.highestIndex + 1); - slkPage->header.highestIndex++; - ndbrequire(slkPage->header.insertPos < (ZWORDS_IN_PAGE - slkPage->header.highestIndex)); - // Reset index. We have already checked that we can increase "highestIndex" value - // without overwriting the data part. - slkPage->word32[ZWORDS_IN_PAGE - slkPage->header.highestIndex] = 0; - dbgWord32(slkPageptr, ZPOS_FREE_AREA_IN_PAGE, slkPage->header.freeArea - 1); - slkPage->header.freeArea--; - } else { - jam(); - dbgWord32(slkPageptr, ZPOS_NEXT_FREE_INDEX, slkPage->word32[ZWORDS_IN_PAGE - tslkPageIndex]); - arrGuard(ZWORDS_IN_PAGE - tslkPageIndex, 2048); - arrGuard(slkPage->word32[ZWORDS_IN_PAGE - tslkPageIndex], 2048); - - slkPage->header.nextFreeIndex = slkPage->word32[ZWORDS_IN_PAGE - tslkPageIndex]; - if(slkPage->header.nextFreeIndex > slkPage->header.highestIndex){ - slkPage->header.nextFreeIndex = 0; - dbgWord32(slkPageptr, ZPOS_NEXT_FREE_INDEX, slkPage->header.nextFreeIndex); - } - }//if - - dbgWord32(slkPageptr, ZWORDS_IN_PAGE - tslkPageIndex, tslkKeyLen); - dbgWord32(slkPageptr, ZWORDS_IN_PAGE - tslkPageIndex, slkPage->header.insertPos); - arrGuard(ZWORDS_IN_PAGE - tslkPageIndex, 2048); - slkPage->word32[ZWORDS_IN_PAGE - tslkPageIndex] = - slkPage->header.insertPos | (tslkKeyLen << 16); - - dbgWord32(slkPageptr, ZPOS_INSERT_INDEX, slkPage->header.insertPos); - tslkInsertIndex = slkPage->header.insertPos; - slkPage->header.insertPos += tslkKeyLen; - - dbgWord32(slkPageptr, ZPOS_FREE_AREA_IN_PAGE, slkPage->header.freeArea - tslkKeyLen); - slkPage->header.freeArea = slkPage->header.freeArea - tslkKeyLen; - if (slkPage->header.freeArea < 128) { - jam(); - tslkArrayPos = 4; - } else { - jam(); - tslkArrayPos = (slkPage->header.freeArea - 128) / 512; - }//if - - if (tslkArrayPos != slkPage->header.pageArrayPos) { - jam(); - if (cundoLogActive != ZTRUE) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* WE ONLY HANDLE THE LISTS WHEN WE ARE NOT IN A SYSTEM RESTART. */ - /* --------------------------------------------------------------------------------- */ - rfpPageptr = slkPageptr; - trfpArrayPos = slkPage->header.pageArrayPos; - removeFromPageArrayList(signal); - ipaPagePtr = slkPageptr; - tipaArrayPos = tslkArrayPos; - slkPage->header.pageArrayPos = tipaArrayPos; - if (tslkArrayPos != 4) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* THE PAGE WILL STILL BE ON ONE OF THE FREE LISTS SINCE AT LEAST 128 * 4 */ - /* BYTES OF FREE SPACE REMAINS ON THE PAGE. */ - /* --------------------------------------------------------------------------------- */ - insertPageArrayList(signal); - }//if - } else { - // This should never happen. Should use storeLongKeysAtPos() instead when executing - // undolog. - ndbrequire(false); - } - }//if - /* --------------------------------------------------------------------------------- */ - /* INCREASE THE NUMBER OF ELEMENTS IN THE PAGE. */ - /* --------------------------------------------------------------------------------- */ - dbgWord32(slkPageptr, ZPOS_NO_ELEM_IN_PAGE, slkPage->header.noOfElements + 1); - slkPage->header.noOfElements++; - - guard27 = tslkKeyLen - 1; - arrGuard(guard27 + tslkInsertIndex, 2048); - for (tslkTmp = 0; tslkTmp <= guard27; tslkTmp++) { - dbgWord32(slkPageptr, tslkTmp + tslkInsertIndex, slkCopyPageptr.p->word32[tslkTmp]); - slkPage->word32[tslkTmp + tslkInsertIndex] = slkCopyPageptr.p->word32[tslkTmp]; - }//for - - // Used by abortoperation() in case of an abort. - operationRecPtr.p->longPagePtr = slkPageptr.i; - - // This is for an eventual LCP start in the middle of this locked operation. - operationRecPtr.p->longKeyPageIndex = tslkPageIndex; - -#ifdef VM_TRACE - if (cundoLogActive != ZTRUE) checkPageArrayList(signal, "storeLongKeys"); - checkIndexInLongKeyPage(slkPageptr.i, "storeLongKeys2"); -#endif - -}//Dbacc::storeLongKeys() - -/* --------------------------------------------------------------------------------- */ -/* REORGANIZE THE PAGE BY COPYING IT TEMPORARILY TO A NEW AREA AND THEN SIMPLY */ -/* PUTTING THE OBJECTS BACK ON THE PAGE IN THE SAME ORDER AS THEY ARE PLACED IN THE */ -/* INDEX. */ -/* --------------------------------------------------------------------------------- */ -void Dbacc::reorgLongPage(Signal* signal) -{ - Uint32 indexStartPos; - Uint32 pagePos; - Uint32 pagePos2; - Uint32 indexNo; - Uint32 insertPos; - Uint32 indexValue; - Uint32 keyLength; - Uint32 keyPos; - Uint32 keyEndPos; - LongKeyPage *reOrgPage; - - ptrGuard(relpPageptr); - reOrgPage = (LongKeyPage *) &relpPageptr.p->word32[0]; - - dbgWord32(relpPageptr, ZPOS_LAST_INDEX, reOrgPage->header.highestIndex); - indexStartPos = ZWORDS_IN_PAGE - reOrgPage->header.highestIndex; - - // Copy key data part of page to a temporary page. - for (pagePos = ZHEAD_SIZE; pagePos < indexStartPos; pagePos++) { - jam(); - arrGuard(pagePos, 2048); - ckeys[pagePos] = reOrgPage->word32[pagePos]; - }//for - - insertPos = ZHEAD_SIZE; - - // Walk through all the indexes. - for (indexNo = 1; indexNo <= reOrgPage->header.highestIndex; indexNo++) { - jam(); - arrGuard(ZWORDS_IN_PAGE - indexNo, 2048); - dbgWord32(relpPageptr, ZWORDS_IN_PAGE - indexNo, reOrgPage->word32[ZWORDS_IN_PAGE - indexNo]); - indexValue = reOrgPage->word32[ZWORDS_IN_PAGE - indexNo]; - - if ((indexValue >> 16) != 0) { - // The index contains a reference to a key. - jam(); - keyPos = indexValue & 0xffff; - keyLength = indexValue >> 16; - dbgWord32(relpPageptr, ZWORDS_IN_PAGE - indexNo, insertPos + (keyLength << 16)); - arrGuard(ZWORDS_IN_PAGE - indexNo, 2048); - - // Refresh the index data with the new key start position in the data part. - reOrgPage->word32[ZWORDS_IN_PAGE - indexNo] = insertPos + (keyLength << 16); - keyEndPos = keyPos + keyLength; - arrGuard(keyEndPos, 2048); - - // Copy the key from the temporary page - // to the insert position at original page. - for (pagePos2 = keyPos; pagePos2 < keyEndPos; pagePos2++, insertPos++) { - jam(); - dbgWord32(relpPageptr, insertPos, ckeys[pagePos2]); - arrGuard(insertPos, 2048); - arrGuard(pagePos2, 2048); - reOrgPage->word32[insertPos] = ckeys[pagePos2]; - }//for - }//if - }//for - dbgWord32(relpPageptr, ZPOS_INSERT_INDEX, insertPos); - reOrgPage->header.insertPos = insertPos; -}//Dbacc::reorgLongPage() - - -/* --------------------------------------------------------------------------------- */ -/* DELETE_LONG_KEY */ -/* INPUT: DLK_PAGEPTR PAGE POINTER OF DELETED KEY OBJECT */ -/* TDLK_LOGICAL_PAGE_INDEX LOGICAL PAGE INDEX OF DELETED KEY OBJECT */ -/* */ -/* DESCRIPTION: DELETE AN ELEMENT OF A LONG_KEY_PAGE. */ -/* --------------------------------------------------------------------------------- */ -void Dbacc::deleteLongKey(Signal* signal) -{ - Uint32 tdlkLastIndex; - Uint32 tdlkNextPosition; - Uint32 tdlkFreeArea; - Uint32 tdlkArrayPos; - Uint32 tdlkOldArrayPos; - LongKeyPage *dlkPage; - - jam(); - dlkPage = (LongKeyPage *) &dlkPageptr.p->word32[0]; - -#ifdef VM_TRACE - checkIndexInLongKeyPage(dlkPageptr.i, "deleteLongKey1"); -#endif - - dbgWord32(dlkPageptr, ZWORDS_IN_PAGE - tdlkLogicalPageIndex, dlkPage->word32[ZWORDS_IN_PAGE - tdlkLogicalPageIndex] >> 16); - dbgWord32(dlkPageptr, ZWORDS_IN_PAGE - tdlkLogicalPageIndex, dlkPage->word32[ZWORDS_IN_PAGE - tdlkLogicalPageIndex] & 0xffff); - arrGuard(ZWORDS_IN_PAGE - tdlkLogicalPageIndex, 2048); - - const Uint32 tdlkIndexValue = dlkPage->word32[ZWORDS_IN_PAGE - tdlkLogicalPageIndex]; - const Uint32 tdlkKeyLen = tdlkIndexValue >> 16; - const Uint32 tdlkPhysPageIndex = tdlkIndexValue & 0xffff; - - if (fragrecptr.p->createLcp == ZTRUE) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* WE LOG THE DELETE LONG KEY BY LOGGING THE DELETED KEY AND ITS LOGICAL INDEX.*/ - /* --------------------------------------------------------------------------------- */ - datapageptr.p = dlkPageptr.p; - cundoElemIndex = tdlkLogicalPageIndex; - cundoinfolength = tdlkKeyLen; - undoWritingProcess(signal); - }//if - /* --------------------------------------------------------------------------------- */ - /* DECREASE THE NUMBER OF ELEMENTS IN THE PAGE. */ - /* --------------------------------------------------------------------------------- */ - dbgWord32(dlkPageptr, ZPOS_NO_ELEM_IN_PAGE, dlkPage->header.noOfElements - 1); - dlkPage->header.noOfElements--; - - arrGuard(dlkPage->header.noOfElements, ZMAX_NO_OF_LONGKEYS_IN_PAGE); - - /* --------------------------------------------------------------------------------- */ - /* INCREASE THE FREE AREA IN THE PAGE. */ - /* --------------------------------------------------------------------------------- */ - dbgWord32(dlkPageptr, ZPOS_FREE_AREA_IN_PAGE, dlkPage->header.freeArea + tdlkKeyLen); - dbgWord32(dlkPageptr, ZPOS_LAST_INDEX, dlkPage->header.highestIndex); - - dlkPage->header.freeArea += tdlkKeyLen; - - if (dlkPage->header.noOfElements == 0) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* THE PAGE IS NOW EMPTY, WE CAN RELEASE IT. */ - /* --------------------------------------------------------------------------------- */ - if (dlkPage->header.freeArea != - (ZWORDS_IN_PAGE - ZHEAD_SIZE - dlkPage->header.highestIndex )) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* SOME AREA IN THE PAGE IS STILL LEFT BUT NO ELEMENTS, INCONSISTENT */ - /* --------------------------------------------------------------------------------- */ - sendSystemerror(signal); - }//if - /* --------------------------------------------------------------------------------- */ - /* WE REMOVE THE PAGE FROM THE LIST OF FREE LONG PAGES. THERE IS NO RISK THAT IT */ - /* DID NOT BELONG TO ANY SINCE IT IS NOT ALLOWED TO HAVE THAT LARGE KEYS. */ - /* --------------------------------------------------------------------------------- */ - - if (cundoLogActive != ZTRUE) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* WHEN DELETING KEYS DURING SYSTEM RESTART WE NEED NOT UPDATE THE LISTS. */ - /* --------------------------------------------------------------------------------- */ - // REMOVEFROMLIST is done by releaseLongPage(). EDTJAMO. - // rfpPageptr = dlkPageptr; - // trfpArrayPos = dlkPage->header.pageArrayPos; - // removeFromPageArrayList(signal, "deleteLongKey"); - rlopPageptr = dlkPageptr; - releaseLongPage(signal); - return; - } else { - // Must remove reference to the removed key, otherwise left in index. EDTJAMO. - arrGuard(ZWORDS_IN_PAGE - tdlkLogicalPageIndex, 2048); - arrGuard(tdlkLogicalPageIndex, 2048); - - tdlkNextPosition = dlkPage->header.nextFreeIndex; - dlkPage->header.nextFreeIndex = tdlkLogicalPageIndex; - dbgWord32(dlkPageptr, ZWORDS_IN_PAGE - tdlkLogicalPageIndex, tdlkNextPosition); - dlkPage->word32[ZWORDS_IN_PAGE - tdlkLogicalPageIndex] = tdlkNextPosition; - } - } else { - /* --------------------------------------------------------------------------------- */ - /* THE PAGE IS NOT EMPTY SO WE WILL REMOVE THE KEY OBJECT AND UPDATE THE */ - /* HEADER INFORMATION AND PLACE THE PAGE IN THE PROPER PAGE LIST. */ - /* --------------------------------------------------------------------------------- */ - tdlkLastIndex = dlkPage->header.highestIndex; - arrGuard(ZWORDS_IN_PAGE - tdlkLastIndex, 2048); - if (tdlkLastIndex == tdlkLogicalPageIndex) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* WE DELETE THE LAST PAGE INDEX SO WE NEED TO UPDATE THE VALUE. WE MOVE */ - /* BACKWARDS UNTIL WE EITHER FIND A USED INDEX OR THAT WE COME TO INDEX ZERO. */ - /* --------------------------------------------------------------------------------- */ - tdlkLastIndex--; - while( (tdlkLastIndex > 1) && - (dlkPage->word32[ZWORDS_IN_PAGE - tdlkLastIndex] >> 16) == 0 ) { - jam(); - tdlkLastIndex--; - } - //----------------------------------------------------- - // Reorganize the rest of the index. Set up the free - // list and the free index. - //----------------------------------------------------- - UintR dlkTmp = tdlkLastIndex; - dlkPage->header.nextFreeIndex = 0; - while( dlkTmp > 0) { - if ( (dlkPage->word32[ZWORDS_IN_PAGE - dlkTmp] >> 16) == 0 ) { - jam(); - dlkPage->word32[ZWORDS_IN_PAGE - dlkTmp] = dlkPage->header.nextFreeIndex; - arrGuard(dlkTmp, 2048); - dlkPage->header.nextFreeIndex = dlkTmp; - } - dlkTmp--; - } - //----------------------------------------------------- - // Update free area in page and last index. - //----------------------------------------------------- - dbgWord32(dlkPageptr, ZPOS_LAST_INDEX, tdlkLastIndex); - dlkPage->header.highestIndex = tdlkLastIndex; - dlkPage->header.freeArea = tdlkLogicalPageIndex + - dlkPage->header.freeArea - tdlkLastIndex; - tdlkNextPosition = 0; - } else { - if (dlkPage->header.highestIndex > tdlkLogicalPageIndex) { - jam(); - tdlkNextPosition = dlkPage->header.nextFreeIndex; - dbgWord32(dlkPageptr, ZPOS_NEXT_FREE_INDEX, tdlkLogicalPageIndex); - arrGuard(tdlkLogicalPageIndex, 2048); - dlkPage->header.nextFreeIndex = tdlkLogicalPageIndex; - } else { - jam(); - /* --------------------------------------------------------------------------------- */ - /* LOGICAL PAGE INDEX LARGER THAN LARGEST INDEX, INCONSISTENT. */ - /* --------------------------------------------------------------------------------- */ - sendSystemerror(signal); - return; // Just to keep compiler happy - }//if - }//if - /* --------------------------------------------------------------------------------- */ - /* WE INSERT ZERO INTO THE LENGTH PART TO INDICATE A FREE INDEX POSITION. */ - /* WE INSERT A POINTER TO THE NEXT FREE INDEX TO AS TO PUT IT INTO A FREE */ - /* LIST OF INDEX POSITIONS. WE ONLY DO SO IF IT WAS NOT THE LAST INDEX. */ - /* --------------------------------------------------------------------------------- */ - dbgWord32(dlkPageptr, ZWORDS_IN_PAGE - tdlkLogicalPageIndex, tdlkNextPosition); - arrGuard(ZWORDS_IN_PAGE - tdlkLogicalPageIndex, 2048); - dlkPage->word32[ZWORDS_IN_PAGE - tdlkLogicalPageIndex] = tdlkNextPosition; - if (dlkPage->header.insertPos == (tdlkPhysPageIndex + tdlkKeyLen)) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* THIS ENTRY IS THE LAST ON THE PAGE SO WE WILL UPDATE THE INSERT INDEX */ - /* --------------------------------------------------------------------------------- */ - dbgWord32(dlkPageptr, ZPOS_INSERT_INDEX, tdlkPhysPageIndex); - dlkPage->header.insertPos = tdlkPhysPageIndex; - }//if - }//if - dbgWord32(dlkPageptr, ZPOS_FREE_AREA_IN_PAGE, dlkPage->header.freeArea); - tdlkFreeArea = dlkPage->header.freeArea; - ndbrequire(tdlkFreeArea <= (ZWORDS_IN_PAGE - ZHEAD_SIZE)); - if (tdlkFreeArea < 128) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* FREE AREA IS STILL LESS THAN 128 WORDS SO IT SHOULD NOT BE PLACED IN ANY OF THE */ - /* FREE LISTS. */ - /* --------------------------------------------------------------------------------- */ - dbgWord32(dlkPageptr, ZPOS_ARRAY_POS, dlkPage->header.pageArrayPos); - ndbrequire(dlkPage->header.pageArrayPos == 4); - } else { - jam(); - // Calculate an eventually new arraypos. - dbgWord32(dlkPageptr, 0, (tdlkFreeArea - 128) / 512); - tdlkArrayPos = (tdlkFreeArea - 128) / 512; - - if (cundoLogActive != ZTRUE) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* WHEN DELETING KEYS DURING SYSTEM RESTART WE NEED NOT UPDATE THE LISTS. */ - /* --------------------------------------------------------------------------------- */ - dbgWord32(dlkPageptr, ZPOS_ARRAY_POS, dlkPage->header.pageArrayPos); - tdlkOldArrayPos = dlkPage->header.pageArrayPos; - if (tdlkArrayPos != tdlkOldArrayPos) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* THE NEW MEMORY AREA HAS ENABLED THE PAGE TO MOVE TO A NEW FREE PAGE LIST */ - /* --------------------------------------------------------------------------------- */ - rfpPageptr = dlkPageptr; - trfpArrayPos = tdlkOldArrayPos; - if (tdlkOldArrayPos != 4) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* THERE WAS A FREE PAGE LIST TO REMOVE THE PAGE FROM. IF FREE SPACE IS LESS THAN */ - /* 128 BYTES THEN IT IS NOT ON ANY FREE LIST. */ - /* --------------------------------------------------------------------------------- */ - removeFromPageArrayList(signal); - }//if - dlkPage->header.pageArrayPos = tdlkArrayPos; - ipaPagePtr = dlkPageptr; - tipaArrayPos = tdlkArrayPos; - insertPageArrayList(signal); - }//if - } else { - // Update pageArrayPos. We are in a SR, executing undolog, insertPageArrayList() called - // from execACC_OVER_REC needs this value later. - dlkPage->header.pageArrayPos = tdlkArrayPos; - } - }//if -#ifdef VM_TRACE - if (cundoLogActive != ZTRUE) checkPageArrayList(signal, "deleteLongKey"); - checkIndexInLongKeyPage(dlkPageptr.i, "deleteLongKey2"); -#endif -}//Dbacc::deleteLongKey() - - -void Dbacc::checkIndexInLongKeyPage(Uint32 pageId, const char *calledFrom) { - Page8Ptr pagePtr; - LongKeyPage *page; - Uint32 indexNo; - Uint32 indexValue; - Uint32 keyLength; - Uint32 keyPos; - - pagePtr.i = pageId; - ptrCheckGuard(pagePtr, cpagesize, page8); - page = (LongKeyPage *) &pagePtr.p->word32[0]; - - // Check the header variables. - if (page->header.nextFreeIndex > 2048 || - page->header.highestIndex > 2048 || - page->header.insertPos > 2048 || - page->header.freeArea > 2048 || - page->header.noOfElements > 225) { - ndbout << " ERROR in checkIndexInLongKeyPage, called from " << calledFrom << endl - << " pagePtr.i = " << pageId << endl; - printoutInfoAndShutdown(page); - } - - // Walk through all the indexes. - for (indexNo = 1; indexNo <= page->header.highestIndex; indexNo++) { - jam(); - indexValue = page->word32[ZWORDS_IN_PAGE - indexNo]; - - if ((indexValue >> 16) == 0) { - ; // key length is 0, means no key reference at this position in index. - } else { - // The index contains a reference to a key. - jam(); - keyPos = indexValue & 0xffff; - keyLength = indexValue >> 16; - if (keyPos >= ZWORDS_IN_PAGE || keyLength >= ZWORDS_IN_PAGE) { - jam(); - ndbout << " ERROR in checkIndexInLongKeyPage, called from " << calledFrom << endl - << " keyPos = " << keyPos << endl - << " keyLength = " << keyLength << endl - << " page->header.noOfElements = " << page->header.noOfElements << endl - << " page->header.freeArea = " << page->header.freeArea << endl - << " indexNo = " << indexNo << endl - << " page->header.highestIndex = " << page->header.highestIndex << endl; - ndbrequire(false); - } - } - } -}//Dbacc::checkIndexInLongKeyPage - - -/* --------------------------------------------------------------------------------- */ -/* REMOVE A PAGE FROM THE PAGE ARRAY LIST. */ -/* --------------------------------------------------------------------------------- */ -void Dbacc::removeFromPageArrayList(Signal* signal) -{ - Page8Ptr rfpPrevPageptr; - Page8Ptr rfpNextPageptr; - LongKeyPage *page; - LongKeyPage *prevPage; - LongKeyPage *nextPage; - - jam(); - -#ifdef VM_TRACE - checkPageB4Remove(rfpPageptr.i, "removeFromPageArrayList"); -#endif - - page = (LongKeyPage *) &rfpPageptr.p->word32[0]; - - if (page->header.prevPage == RNIL) { - jam(); - arrGuard(trfpArrayPos, 4); - // This page was first in list, remove reference - // to this page from the start of the list. - ndbrequire(fragrecptr.p->longKeyPageArray[trfpArrayPos] == rfpPageptr.i); - fragrecptr.p->longKeyPageArray[trfpArrayPos] = page->header.nextPage; - } else { - jam(); - rfpPrevPageptr.i = page->header.prevPage; - ptrCheckGuard(rfpPrevPageptr, cpagesize, page8); - prevPage = (LongKeyPage *) &rfpPrevPageptr.p->word32[0]; - // This page wasn't first in list, remove reference - // to this page from the previous page. - ndbrequire(prevPage->header.nextPage == rfpPageptr.i); - prevPage->header.nextPage = page->header.nextPage; - }//if - - if (page->header.nextPage != RNIL) { - jam(); - rfpNextPageptr.i = page->header.nextPage; - ptrCheckGuard(rfpNextPageptr, cpagesize, page8); - nextPage = (LongKeyPage *) &rfpNextPageptr.p->word32[0]; - // This page wasn't last in list, remove reference - // to this page from the next page. - ndbrequire(nextPage->header.prevPage == rfpPageptr.i); - nextPage->header.prevPage = page->header.prevPage; - // Remove reference to next page in list. - page->header.nextPage = RNIL; - }//if - - // This couldn't be set until now. - // Remove reference to previous page in list. - page->header.prevPage = RNIL; - -#ifdef VM_TRACE - checkPageArrayList(signal, "removeFromPageArrayList"); -#endif -}//Dbacc::removeFromPageArrayList() - -/* --------------------------------------------------------------------------------- */ -/* INSERT A PAGE INTO THE PAGE ARRAY LIST. */ -/* --------------------------------------------------------------------------------- */ -void Dbacc::insertPageArrayList(Signal* signal) -{ - Page8Ptr ipaNextPagePtr; - LongKeyPage *page; - LongKeyPage *nextPage; - - jam(); - -#ifdef VM_TRACE - checkPageArrayList(signal, "insertPageArrayList1"); - checkPageB4Insert(ipaPagePtr.i, "insertPageArrayList1"); -#endif - - page = (LongKeyPage *) &ipaPagePtr.p->word32[0]; - - arrGuard(tipaArrayPos, 4); - - if (fragrecptr.p->longKeyPageArray[tipaArrayPos] != RNIL) { - jam(); - ipaNextPagePtr.i = fragrecptr.p->longKeyPageArray[tipaArrayPos]; - ptrCheckGuard(ipaNextPagePtr, cpagesize, page8); - nextPage = (LongKeyPage *) &ipaNextPagePtr.p->word32[0]; - - // A page already existed in the list, add reference - // to this page in the next page. - nextPage->header.prevPage = ipaPagePtr.i; - }//if - - page->header.prevPage = RNIL; - page->header.nextPage = fragrecptr.p->longKeyPageArray[tipaArrayPos]; - page->header.pageArrayPos = tipaArrayPos; - - fragrecptr.p->longKeyPageArray[tipaArrayPos] = ipaPagePtr.i; - -#ifdef VM_TRACE - checkPageArrayList(signal, "insertPageArrayList2"); -#endif -}//Dbacc::insertPageArrayList() - -// --------------------------------------------------------------------------------- */ -// Check the page array list. -// --------------------------------------------------------------------------------- */ -void Dbacc::checkPageArrayList(Signal* signal, const char *calledFrom) -{ - Page8Ptr pagePtr; - Uint32 pageArrayIndex; - LongKeyPage *page; - Uint32 prevPage; - - // Go through the longKeyPageArray and search for a page. - for (pageArrayIndex = 0; pageArrayIndex <= ZMAX_LONG_KEY_ARRAY_INDEX; pageArrayIndex++) { - jam(); - pagePtr.i = fragrecptr.p->longKeyPageArray[pageArrayIndex]; - prevPage = RNIL; - - if (pagePtr.i != RNIL) { - // A page is found. - jam(); - do { - ptrCheckGuard(pagePtr, cpagesize, page8); - page = (LongKeyPage *) &pagePtr.p->word32[0]; - - if ((page->header.freeArea >= 128) && - (((page->header.freeArea - 128) / 512) == page->header.pageArrayPos) && - (pageArrayIndex == page->header.pageArrayPos) && - (page->header.prevPage == prevPage)) { - // The page found is OK, test next page. - prevPage = pagePtr.i; - pagePtr.i = page->header.nextPage; - jam(); - } else { - jam(); - ndbout << " ERROR in checkPageArrayList, called from " << calledFrom << endl - << " pagePtr.i = " << pagePtr.i << endl - << " prevPage = " << prevPage << endl - << " pageArrayIndex = " << pageArrayIndex << endl; - printoutInfoAndShutdown(page); - } - }//do - while (pagePtr.i != RNIL); - }//if - }//for -}//Dbacc::checkPageArrayList() - -// --------------------------------------------------------------------------------- */ -// Check the page to put into the pageArrayList. -// --------------------------------------------------------------------------------- */ -void Dbacc::checkPageB4Insert(Uint32 pageId, const char *calledFrom) { - Page8Ptr pagePtr; - Uint32 pageArrayIndex; - LongKeyPage *page; - - pagePtr.i = pageId; - ptrCheckGuard(pagePtr, cpagesize, page8); - page = (LongKeyPage *) &pagePtr.p->word32[0]; - - if ((page->header.nextPage != RNIL) || - (page->header.prevPage != RNIL)) { - jam(); - ndbout << " ERROR in checkPageB4Insert, called from " << calledFrom << endl - << " pagePtr.i = " << pagePtr.i << endl - << " page->header.nextPage = " << page->header.nextPage << endl - << " page->header.prevPage = " << page->header.prevPage << endl; - ndbrequire(false); - } - - // Page should not be inserted in list if free area is less than 512 byte. - if (page->header.freeArea < 128) { - jam(); - ndbout << " ERROR in checkPageB4Insert, called from " << calledFrom << endl - << " Page has to little free area to be in list." << endl - << " pagePtr.i = " << pagePtr.i << endl - << " tipaArrayPos = " << tipaArrayPos << endl; - printoutInfoAndShutdown(page); - } - - // Check if position in list is correct - if ((((page->header.freeArea - 128) / 512) != page->header.pageArrayPos) || - (page->header.pageArrayPos != tipaArrayPos)) { - ndbout << " ERROR in checkPageB4Insert, called from " << calledFrom << endl - << " Incorrect position in list." << endl - << " pagePtr.i = " << pagePtr.i << endl - << " tipaArrayPos = " << tipaArrayPos << endl; - printoutInfoAndShutdown(page); - } - - // Check if page is already in list. - for (pageArrayIndex = 0; pageArrayIndex <= ZMAX_LONG_KEY_ARRAY_INDEX; pageArrayIndex++) { - jam(); - pagePtr.i = fragrecptr.p->longKeyPageArray[pageArrayIndex]; - - if (pagePtr.i != RNIL) { - // A page is found. - jam(); - do { - ptrCheckGuard(pagePtr, cpagesize, page8); - page = (LongKeyPage *) &pagePtr.p->word32[0]; - if (pagePtr.i == pageId) { - jam(); - ndbout << "ERROR in checkPageB4Insert, called from " << calledFrom << endl - << "Page exists already in list." << endl - << " pagePtr.i = " << pagePtr.i << endl; - printoutInfoAndShutdown(page); - } - pagePtr.i = page->header.nextPage; - }//do - while (pagePtr.i != RNIL); - }//if - }//for -}//Dbacc::checkPageB4Insert() - -// --------------------------------------------------------------------------------- */ -// Check the page to remove from the pageArrayList. -// --------------------------------------------------------------------------------- */ -void Dbacc::checkPageB4Remove(Uint32 pageId, const char *calledFrom) { - Page8Ptr pagePtr; - Uint32 pageArrayIndex; - Uint32 noOfOccurrence = 0; - Uint32 noOfPagesInList = 0; - LongKeyPage *page; - - LongKeyPage *prevPage; - LongKeyPage *nextPage; - Page8Ptr rfpPrevPageptr; - Page8Ptr rfpNextPageptr; - - - pagePtr.i = pageId; - ptrCheckGuard(pagePtr, cpagesize, page8); - page = (LongKeyPage *) &pagePtr.p->word32[0]; - - // Check that page is in list. - for (pageArrayIndex = 0; pageArrayIndex <= ZMAX_LONG_KEY_ARRAY_INDEX; pageArrayIndex++) { - jam(); - pagePtr.i = fragrecptr.p->longKeyPageArray[pageArrayIndex]; - - if (pagePtr.i != RNIL) { - // A page is found. - jam(); - do { - noOfPagesInList++; - ptrCheckGuard(pagePtr, cpagesize, page8); - page = (LongKeyPage *) &pagePtr.p->word32[0]; - if (pagePtr.i == pageId) { - // Check the consistent in list. - if (page->header.prevPage != RNIL) { - rfpPrevPageptr.i = page->header.prevPage; - ptrCheckGuard(rfpPrevPageptr, cpagesize, page8); - prevPage = (LongKeyPage *) &rfpPrevPageptr.p->word32[0]; - if (prevPage->header.nextPage != pageId) { - ndbout << "ERROR: inconsistent in checkPageB4Remove, called from " << calledFrom << endl - << "prevPage->header.nextPage = " << prevPage->header.nextPage << endl - << "pageId = " << pageId << endl; - printoutInfoAndShutdown(page); - } - } - // Check the consistent in list. - if (page->header.nextPage != RNIL) { - rfpNextPageptr.i = page->header.nextPage; - ptrCheckGuard(rfpNextPageptr, cpagesize, page8); - nextPage = (LongKeyPage *) &rfpNextPageptr.p->word32[0]; - if (nextPage->header.prevPage != pageId) { - ndbout << "ERROR: inconsistent in checkPageB4Remove, called from " << calledFrom << endl - << "nextPage->header.prevPage = " << nextPage->header.prevPage << endl - << "pageId = " << pageId << endl; - printoutInfoAndShutdown(page); - } - } - jam(); - noOfOccurrence++; - } - pagePtr.i = page->header.nextPage; - }//do - while (pagePtr.i != RNIL); - }//if - }//for - - if (noOfOccurrence != 1) { - pagePtr.i = pageId; - ptrCheckGuard(pagePtr, cpagesize, page8); - page = (LongKeyPage *) &pagePtr.p->word32[0]; - ndbout << "ERROR in checkPageB4Remove, called from " << calledFrom << endl - << "Page occur " << noOfOccurrence << " times in list" << endl - << "pageId = " << pageId << endl; - printoutInfoAndShutdown(page); - } -}//Dbacc::checkPageB4Remove() - - -// --------------------------------------------------------------------------------- */ -// Printout an error message and shutdown node. -// --------------------------------------------------------------------------------- */ -void Dbacc::printoutInfoAndShutdown(LongKeyPage *page) { - ndbout << " page->header.pageArrayPos = " << page->header.pageArrayPos << endl - << " ((page->header.freeArea - 128) / 512) = " - << ((page->header.freeArea - 128) / 512) << endl - << " page->header.freeArea = " << page->header.freeArea << endl - << " page->header.noOfElements = " << page->header.noOfElements << endl - << " page->header.nextPage = " << page->header.nextPage << endl - << " page->header.prevPage = " << page->header.prevPage << endl - << " page->header.nextFreeIndex = " << page->header.nextFreeIndex << endl - << " page->header.insertPos = " << page->header.insertPos << endl - << " page->header.highestIndex = " << page->header.highestIndex << endl - << " page->header.pageId = " << page->header.pageId << endl; - ndbrequire(false); -}//Dbacc::printoutInfoAndShutdown() - /* --------------------------------------------------------------------------------- */ /* --------------------------------------------------------------------------------- */ /* --------------------------------------------------------------------------------- */ @@ -4419,7 +3186,7 @@ void Dbacc::printoutInfoAndShutdown(LongKeyPage *page) { /* --------------------------------------------------------------------------------- */ /* --------------------------------------------------------------------------------- */ /* */ -/* MODULE: READ */ +/* MODULE: GET_ELEMENT */ /* THE FOLLOWING SUBROUTINES ARE ONLY USED BY GET_ELEMENT AND */ /* GETDIRINDEX. THIS ROUTINE IS THE SOLE INTERFACE TO GET ELEMENTS */ /* FROM THE INDEX. CURRENT USERS ARE ALL REQUESTS AND EXECUTE UNDO LOG */ @@ -4480,6 +3247,21 @@ void Dbacc::getdirindex(Signal* signal) ptrCheckGuard(gdiPageptr, cpagesize, page8); }//Dbacc::getdirindex() +Uint32 +Dbacc::readTablePk(Uint32 localkey1) +{ + Uint32 tableId = fragrecptr.p->myTableId; + Uint32 fragId = fragrecptr.p->myfid; + Uint32 fragPageId = localkey1 >> MAX_TUPLES_BITS; + Uint32 pageIndex = localkey1 & ((1 << MAX_TUPLES_BITS ) - 1); +#ifdef VM_TRACE + memset(ckeys, 0x1f, (fragrecptr.p->keyLength * MAX_XFRM_MULTIPLY) << 2); +#endif + int ret = c_tup->accReadPk(tableId, fragId, fragPageId, pageIndex, ckeys, true); + ndbrequire(ret > 0); + return ret; +} + /* --------------------------------------------------------------------------------- */ /* GET_ELEMENT */ /* INPUT: */ @@ -4521,7 +3303,6 @@ void Dbacc::getElement(Signal* signal) Uint32 tgeNextptrtype; register Uint32 tgeKeyptr; register Uint32 tgeRemLen; - register Uint32 tgeCompareLen; register Uint32 TelemLen = fragrecptr.p->elementLength; register Uint32* Tkeydata = (Uint32*)&signal->theData[7]; @@ -4529,20 +3310,15 @@ void Dbacc::getElement(Signal* signal) tgePageindex = tgdiPageindex; gePageptr = gdiPageptr; tgeResult = ZFALSE; - tgeCompareLen = fragrecptr.p->keyLength; - const Uint32 isAccLockReq = operationRecPtr.p->isAccLockReq; - if (isAccLockReq) { - jam(); - tgeCompareLen = 0; - } + /* + * The value seached is + * - table key for ACCKEYREQ, stored in TUP + * - local key (1 word) for ACC_LOCKREQ and UNDO, stored in ACC + */ + const bool searchLocalKey = + operationRecPtr.p->isAccLockReq || operationRecPtr.p->isUndoLogReq; - // We can handle keylength up to 8, but not more (0 means dynamic) - if (tgeCompareLen >= 9) { - ACCKEY_error(2); return; - }//if - if (TelemLen < 3) { - ACCKEY_error(3); return; - }//if + ndbrequire(TelemLen == ZELEM_HEAD_SIZE + fragrecptr.p->localkeylen); tgeNextptrtype = ZLEFT; tgeLocked = 0; @@ -4573,7 +3349,7 @@ void Dbacc::getElement(Signal* signal) } else { ACCKEY_error(6); return; }//if - if (tgeRemLen >= TelemLen) { + if (tgeRemLen >= ZCON_HEAD_SIZE + TelemLen) { if (tgeRemLen > ZBUF_SIZE) { ACCKEY_error(7); return; }//if @@ -4581,151 +3357,46 @@ void Dbacc::getElement(Signal* signal) // There is at least one element in this container. Check if it is the element // searched for. /* --------------------------------------------------------------------------------- */ - if (tgeCompareLen != 0) { - /* --------------------------------------------------------------------------------- */ - /* THIS PART IS USED TO SEARCH FOR KEYS WITH FIXED SIZE. THE LOOP TAKES CARE */ - /* OF SEARCHING THROUGH ALL ELEMENTS IN ONE CONTAINER. */ - /* --------------------------------------------------------------------------------- */ - do { - register Uint32 TdataIndex = 0; - register Uint32 TgeIndex = 0; + do { + tgeElementHeader = gePageptr.p->word32[tgeElementptr]; + tgeRemLen = tgeRemLen - TelemLen; + Uint32 hashValuePart; + if (ElementHeader::getLocked(tgeElementHeader)) { jam(); - tgeRemLen = tgeRemLen - TelemLen; - do { - if (gePageptr.p->word32[tgeKeyptr + TgeIndex] != Tkeydata[TdataIndex]) { - goto compare_next; - }//if - TdataIndex++; - TgeIndex += tgeForward; - } while (TdataIndex < tgeCompareLen); - /* --------------------------------------------------------------------------------- */ - /* WE HAVE FOUND THE ELEMENT. GET THE LOCK INDICATOR AND RETURN FOUND. */ - /* --------------------------------------------------------------------------------- */ + geTmpOperationRecPtr.i = ElementHeader::getOpPtrI(tgeElementHeader); + ptrCheckGuard(geTmpOperationRecPtr, coprecsize, operationrec); + hashValuePart = geTmpOperationRecPtr.p->hashvaluePart; + } else { jam(); -#if __ia64 == 1 -#if __INTEL_COMPILER == 810 - // prevents SIGSEGV under icc -O1 - ndb_acc_ia64_icc810_dummy_func(); -#endif -#endif - tgeLocked = ElementHeader::getLocked(gePageptr.p->word32[tgeElementptr]); - tgeResult = ZTRUE; - TdataIndex = tgeElementptr + tgeForward; - TgeIndex = TdataIndex + tgeForward; - operationRecPtr.p->localdata[0] = gePageptr.p->word32[TdataIndex]; - operationRecPtr.p->localdata[1] = gePageptr.p->word32[TgeIndex]; - return; - /* --------------------------------------------------------------------------------- */ - /* COMPARE NEXT ELEMENT */ - /* --------------------------------------------------------------------------------- */ - compare_next: - if (tgeRemLen <= ZCON_HEAD_SIZE) { - break; - }//if - tgeKeyptr = tgeKeyptr + tgeElemStep; - tgeElementptr = tgeElementptr + tgeElemStep; - } while (1); - } else if (! isAccLockReq) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* THIS PART IS USED TO SEARCH FOR KEYS WITH VARIABLE LENGTH OR FIXED LENGTH */ - /* GREATER THAN 32 BYTES. IN THIS CASE THE KEY PART IS STORED IN A SPECIAL */ - /* LONG PAGE PART AND THE HASH INDEX CONTAINS A REFERENCE TO THERE PLUS A */ - /* PART OF THE HASH VALUE. */ - /* --------------------------------------------------------------------------------- */ - do { - tgeElementHeader = gePageptr.p->word32[tgeElementptr]; - tgeRemLen = tgeRemLen - TelemLen; - Uint32 hashValuePart; - if (ElementHeader::getLocked(tgeElementHeader)) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* IN THIS CASE THE HASH VALUE PART OF THE ELEMENT HEADER IS STORED IN THE */ - /* OPERATION THAT OWNS THE LOCK. IN THIS CASE WE MIGHT AS WELL GO AHEAD AND */ - /* CHECK THE KEY IN THE LONG PAGE. */ - /* --------------------------------------------------------------------------------- */ - geTmpOperationRecPtr.i = - ElementHeader::getOpPtrI(tgeElementHeader); - ptrCheckGuard(geTmpOperationRecPtr, coprecsize, operationrec); - hashValuePart = geTmpOperationRecPtr.p->hashvaluePart; + hashValuePart = ElementHeader::getHashValuePart(tgeElementHeader); + } + if (hashValuePart == opHashValuePart) { + jam(); + Uint32 localkey1 = gePageptr.p->word32[tgeElementptr + tgeForward]; + Uint32 localkey2 = 0; + bool found; + if (! searchLocalKey) { + Uint32 len = readTablePk(localkey1); + found = (len == operationRecPtr.p->xfrmtupkeylen) && + (memcmp(Tkeydata, ckeys, len << 2) == 0); } else { jam(); - /* --------------------------------------------------------------------------------- */ - /* IN THIS CASE THE HASH VALUE PART CAN BE CHECKED TO SEE IF THE HASH VALUE */ - /* GIVES US A REASON TO CONTINUE CHECKING THE FULL KEY. */ - /* --------------------------------------------------------------------------------- */ - hashValuePart = ElementHeader::getHashValuePart(tgeElementHeader); - }//if - - if (hashValuePart == opHashValuePart) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* IF THE HASH VALUES ARE EQUAL THEN XOR-ING THEM WILL GIVE THE RESULT 0. */ - /* --------------------------------------------------------------------------------- */ - /* WE HAVE FOUND A KEY WITH IDENTICAL HASH VALUE. MOST LIKELY WE HAVE FOUND THE*/ - /* ELEMENT BUT FIRST WE NEED TO PERFORM A KEY COMPARISON. */ - /* --------------------------------------------------------------------------------- */ - tslcPageIndex = gePageptr.p->word32[tgeKeyptr] & 0x3ff; - tslcPagedir = gePageptr.p->word32[tgeKeyptr] >> 10; - searchLongKey(signal, true); - if (tslcResult == ZTRUE) { - register Uint32 TlocData1, TlocData2; - jam(); - /* --------------------------------------------------------------------------------- */ - /* WE HAVE FOUND THE ELEMENT. GET THE LOCK INDICATOR AND RETURN FOUND. */ - /* --------------------------------------------------------------------------------- */ - tgeLocked = ElementHeader::getLocked(tgeElementHeader); - tgeResult = ZTRUE; - TlocData1 = tgeElementptr + tgeForward; - TlocData2 = TlocData1 + tgeForward; - operationRecPtr.p->localdata[0] = gePageptr.p->word32[TlocData1]; - operationRecPtr.p->localdata[1] = gePageptr.p->word32[TlocData2]; - return; - }//if + found = (localkey1 == Tkeydata[0]); } - /* --------------------------------------------------------------------------------- */ - /* COMPARE NEXT ELEMENT */ - /* --------------------------------------------------------------------------------- */ - if (tgeRemLen <= ZCON_HEAD_SIZE) { - break; - }//if - tgeKeyptr = tgeKeyptr + tgeElemStep; - tgeElementptr = tgeElementptr + tgeElemStep; - } while (1); - } else { - jam(); - /* --------------------------------------------------------------------------------- */ - /* Search for local key in a lock request */ - /* --------------------------------------------------------------------------------- */ - do { - tgeRemLen = tgeRemLen - TelemLen; - // position of local key word 1 - Uint32 TdataIndex = tgeElementptr + tgeForward; - // XXX assume localkeylen is 1 - if (gePageptr.p->word32[TdataIndex] == Tkeydata[0]) { + if (found) { jam(); - tgeLocked = ElementHeader::getLocked(gePageptr.p->word32[tgeElementptr]); + tgeLocked = ElementHeader::getLocked(tgeElementHeader); tgeResult = ZTRUE; - // position of local key word 2 - Uint32 TgeIndex = TdataIndex + tgeForward; - operationRecPtr.p->localdata[0] = gePageptr.p->word32[TdataIndex]; - operationRecPtr.p->localdata[1] = gePageptr.p->word32[TgeIndex]; - - if (fragrecptr.p->keyLength == 0) { - // set up long key variables in operation record - tslcPageIndex = gePageptr.p->word32[tgeKeyptr] & 0x3ff; - tslcPagedir = gePageptr.p->word32[tgeKeyptr] >> 10; - // no verification since we have no key data - searchLongKey(signal, false); - } + operationRecPtr.p->localdata[0] = localkey1; + operationRecPtr.p->localdata[1] = localkey2; return; - }//if - if (tgeRemLen <= ZCON_HEAD_SIZE) { - break; - }//if - tgeElementptr = tgeElementptr + tgeElemStep; - } while (1); - }//if + } + } + if (tgeRemLen <= ZCON_HEAD_SIZE) { + break; + } + tgeElementptr = tgeElementptr + tgeElemStep; + } while (true); }//if if (tgeRemLen != ZCON_HEAD_SIZE) { ACCKEY_error(8); return; @@ -4756,71 +3427,6 @@ void Dbacc::getElement(Signal* signal) }//Dbacc::getElement() /* --------------------------------------------------------------------------------- */ -/* SEARCH_LONG_KEY */ -/* INPUT: */ -/* TSLC_PAGEDIR PAGE DIRECTORY OF LONG PAGE */ -/* TSLC_PAGE_INDEX PAGE INDEX IN LONG PAGE */ -/* GE_OPERATION_REC_PTR */ -/* OUTPUT: */ -/* TSLC_RESULT */ -/* DESCRIPTION: SEARCH FOR AN ELEMENT IN A LONG_KEY_PAGE. */ -/* --------------------------------------------------------------------------------- */ -void Dbacc::searchLongKey(Signal* signal, bool verify) -{ - DirRangePtr slcOverflowrangeptr; - DirectoryarrayPtr slcOverflowDirptr; - Page8Ptr slcPageptr; - Uint32 tslcIndexValue; - Uint32 tslcStartIndex; - Uint32 tslcIndex; - Uint32 guard30; - Uint32* Tkeydata = (Uint32*)&signal->theData[7]; - - - slcOverflowrangeptr.i = fragrecptr.p->overflowdir; - ptrCheckGuard(slcOverflowrangeptr, cdirrangesize, dirRange); - arrGuard((tslcPagedir >> 8), 256); - slcOverflowDirptr.i = slcOverflowrangeptr.p->dirArray[tslcPagedir >> 8]; - ptrCheckGuard(slcOverflowDirptr, cdirarraysize, directoryarray); - - // dbgWord32(slcOverflowDirptr, (int) (tslcPagedir & 0xff), slcOverflowDirptr.p->pagep[tslcPagedir & 0xff]); - - slcPageptr.i = slcOverflowDirptr.p->pagep[tslcPagedir & 0xff]; - ptrCheckGuard(slcPageptr, cpagesize, page8); - arrGuard(ZWORDS_IN_PAGE - tslcPageIndex, 2048); - dbgWord32(slcPageptr, ZWORDS_IN_PAGE - tslcPageIndex, (int)slcPageptr.p->word32[ZWORDS_IN_PAGE - tslcPageIndex] & 0xffff); - dbgWord32(slcPageptr, ZWORDS_IN_PAGE - tslcPageIndex, slcPageptr.p->word32[ZWORDS_IN_PAGE - tslcPageIndex] >> 16); - tslcIndexValue = slcPageptr.p->word32[ZWORDS_IN_PAGE - tslcPageIndex]; - if (verify) { - if ((tslcIndexValue >> 16) != operationRecPtr.p->tupkeylen) { - jam(); - tslcResult = ZFALSE; - return; - }//if - } - tslcStartIndex = tslcIndexValue & 0xffff; - guard30 = operationRecPtr.p->tupkeylen - 1; - arrGuard(guard30, 2048); - arrGuard(guard30 + tslcStartIndex, 2048); - if (verify) { - for (tslcIndex = 0; tslcIndex <= guard30; tslcIndex++) { - dbgWord32(slcPageptr, tslcIndex + tslcStartIndex, slcPageptr.p->word32[tslcIndex + tslcStartIndex]); - if (slcPageptr.p->word32[tslcIndex + tslcStartIndex] != Tkeydata[tslcIndex]) { - jam(); - tslcResult = ZFALSE; - return; - }//if - }//for - } - jam(); - tslcResult = ZTRUE; - operationRecPtr.p->longPagePtr = slcPageptr.i; - operationRecPtr.p->longKeyPageIndex = tslcPageIndex; - arrGuard(tslcPageIndex, ZMAX_NO_OF_LONGKEYS_IN_PAGE); - arrGuard(slcPageptr.i, cpagesize); -}//Dbacc::searchLongKey() - -/* --------------------------------------------------------------------------------- */ /* --------------------------------------------------------------------------------- */ /* --------------------------------------------------------------------------------- */ /* */ @@ -4863,13 +3469,6 @@ void Dbacc::commitdelete(Signal* signal, bool systemRestart) EXECUTE_DIRECT(DBTUP, GSN_TUP_DEALLOCREQ, signal, 4); jamEntry(); }//if - if (fragrecptr.p->keyLength == 0) { - jam(); - tdlkLogicalPageIndex = operationRecPtr.p->longKeyPageIndex; - dlkPageptr.i = operationRecPtr.p->longPagePtr; - ptrCheckGuard(dlkPageptr, cpagesize, page8); - deleteLongKey(signal); - }//if getdirindex(signal); tlastPageindex = tgdiPageindex; lastPageptr.i = gdiPageptr.i; @@ -5428,50 +4027,6 @@ void Dbacc::checkoverfreelist(Signal* signal) }//if }//Dbacc::checkoverfreelist() -/* --------------------------------------------------------------------------------- */ -/* RELEASE_LONG_PAGE */ -/* --------------------------------------------------------------------------------- */ -void Dbacc::releaseLongPage(Signal* signal) -{ - DirRangePtr rlpOverflowrangeptr; - DirectoryarrayPtr rlpOverflowDirptr; - Uint32 trlpTmp1; - Uint32 trlpTmp2; - Uint32 trlpTmp3; - - jam(); - seizeOverRec(signal); - sorOverflowRecPtr.p->dirindex = rlopPageptr.p->word32[ZPOS_PAGE_ID]; - sorOverflowRecPtr.p->overpage = RNIL; - priOverflowRecPtr = sorOverflowRecPtr; - putRecInFreeOverdir(signal); - trlpTmp1 = sorOverflowRecPtr.p->dirindex; - rlpOverflowrangeptr.i = fragrecptr.p->overflowdir; - trlpTmp2 = trlpTmp1 >> 8; - trlpTmp3 = trlpTmp1 & 0xff; - ptrCheckGuard(rlpOverflowrangeptr, cdirrangesize, dirRange); - arrGuard(trlpTmp2, 256); - rlpOverflowDirptr.i = rlpOverflowrangeptr.p->dirArray[trlpTmp2]; - ptrCheckGuard(rlpOverflowDirptr, cdirarraysize, directoryarray); - rlpOverflowDirptr.p->pagep[trlpTmp3] = RNIL; - - if (cundoLogActive != ZTRUE) { - // Remove from page array. - trfpArrayPos = rlopPageptr.p->word32[ZPOS_ARRAY_POS]; - rfpPageptr = rlopPageptr; - removeFromPageArrayList(signal); - } - - // Reset page header - iloPageptr = rlopPageptr; - tiloIndex = rlopPageptr.p->word32[ZPOS_PAGE_ID]; - initLongOverpage(signal); - - rpPageptr = rlopPageptr; - releasePage(signal); -}//Dbacc::releaseLongPage() - - /* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */ @@ -6162,16 +4717,16 @@ Uint32 Dbacc::executeNextOperation(Signal* signal) else if(operationRecPtr.p->operation == ZWRITE) { jam(); - operationRecPtr.p->operation = ZINSERT; + operationRecPtr.p->operation = ZUPDATE; if (operationRecPtr.p->prevParallelQue != RNIL) { OperationrecPtr prevOpPtr; jam(); prevOpPtr.i = operationRecPtr.p->prevParallelQue; ptrCheckGuard(prevOpPtr, coprecsize, operationrec); - if (prevOpPtr.p->operation != ZDELETE) + if (prevOpPtr.p->operation == ZDELETE) { jam(); - operationRecPtr.p->operation = ZUPDATE; + operationRecPtr.p->operation = ZINSERT; } } } @@ -6761,8 +5316,7 @@ void Dbacc::execDEBUG_SIG(Signal* signal) jamEntry(); expPageptr.i = signal->theData[0]; - progError(__LINE__, - ERR_SR_UNDOLOG); + progError(__LINE__, NDBD_EXIT_SR_UNDOLOG); return; }//Dbacc::execDEBUG_SIG() @@ -6779,14 +5333,8 @@ void Dbacc::expandcontainer(Signal* signal) Uint32 texcHashvalue; Uint32 texcTmp; Uint32 texcIndex; - Uint32 texpKeyLen; Uint32 guard20; - texpKeyLen = fragrecptr.p->keyLength; - if (texpKeyLen == 0) { - jam(); - texpKeyLen = ZACTIVE_LONG_KEY_LEN; - }//if cexcPrevpageptr = RNIL; cexcPrevconptr = 0; cexcForward = ZTRUE; @@ -6873,18 +5421,10 @@ void Dbacc::expandcontainer(Signal* signal) clocalkey[texcIndex] = excPageptr.p->word32[texcTmp]; texcTmp = texcTmp + cexcForward; }//for - guard20 = texpKeyLen - 1; - for (texcIndex = 0; texcIndex <= guard20; texcIndex++) { - arrGuard(texcIndex, 2048); - arrGuard(texcTmp, 2048); - ckeys[texcIndex] = excPageptr.p->word32[texcTmp]; - texcTmp = texcTmp + cexcForward; - }//for tidrPageindex = fragrecptr.p->expReceiveIndex; idrPageptr.i = fragrecptr.p->expReceivePageptr; ptrCheckGuard(idrPageptr, cpagesize, page8); tidrForward = fragrecptr.p->expReceiveForward; - tidrKeyLen = texpKeyLen; insertElement(signal); fragrecptr.p->expReceiveIndex = tidrPageindex; fragrecptr.p->expReceivePageptr = idrPageptr.i; @@ -6966,17 +5506,10 @@ void Dbacc::expandcontainer(Signal* signal) clocalkey[texcIndex] = lastPageptr.p->word32[texcTmp]; texcTmp = texcTmp + tlastForward; }//for - for (texcIndex = 0; texcIndex < texpKeyLen; texcIndex++) { - arrGuard(texcIndex, 2048); - arrGuard(texcTmp, 2048); - ckeys[texcIndex] = lastPageptr.p->word32[texcTmp]; - texcTmp = texcTmp + tlastForward; - }//for tidrPageindex = fragrecptr.p->expReceiveIndex; idrPageptr.i = fragrecptr.p->expReceivePageptr; ptrCheckGuard(idrPageptr, cpagesize, page8); tidrForward = fragrecptr.p->expReceiveForward; - tidrKeyLen = texpKeyLen; insertElement(signal); fragrecptr.p->expReceiveIndex = tidrPageindex; fragrecptr.p->expReceivePageptr = idrPageptr.i; @@ -7096,7 +5629,7 @@ Uint32 Dbacc::checkScanShrink(Signal* signal) //------------------------------------------------------------- } else { jam(); - sendSystemerror(signal); + sendSystemerror(signal, __LINE__); return TreturnCode; }//if }//if @@ -7457,18 +5990,12 @@ void Dbacc::shrinkcontainer(Signal* signal) Uint32 tshrElementptr; Uint32 tshrRemLen; Uint32 tshrInc; - Uint32 tshrKeyLen; Uint32 tshrTmp; Uint32 tshrIndex; Uint32 guard21; tshrRemLen = cexcContainerlen - ZCON_HEAD_SIZE; - tshrKeyLen = fragrecptr.p->keyLength; - if (tshrKeyLen == 0) { - jam(); - tshrKeyLen = ZACTIVE_LONG_KEY_LEN; - }//if - tshrInc = (ZELEM_HEAD_SIZE + tshrKeyLen) + fragrecptr.p->localkeylen; + tshrInc = fragrecptr.p->elementLength; if (cexcForward == ZTRUE) { jam(); tshrElementptr = cexcContainerptr + ZCON_HEAD_SIZE; @@ -7517,18 +6044,10 @@ void Dbacc::shrinkcontainer(Signal* signal) clocalkey[tshrIndex] = excPageptr.p->word32[tshrTmp]; tshrTmp = tshrTmp + cexcForward; }//for - guard21 = tshrKeyLen - 1; - for (tshrIndex = 0; tshrIndex <= guard21; tshrIndex++) { - arrGuard(tshrIndex, 2048); - arrGuard(tshrTmp, 2048); - ckeys[tshrIndex] = excPageptr.p->word32[tshrTmp]; - tshrTmp = tshrTmp + cexcForward; - }//for tidrPageindex = fragrecptr.p->expReceiveIndex; idrPageptr.i = fragrecptr.p->expReceivePageptr; ptrCheckGuard(idrPageptr, cpagesize, page8); tidrForward = fragrecptr.p->expReceiveForward; - tidrKeyLen = tshrKeyLen; insertElement(signal); /* --------------------------------------------------------------------------------- */ /* TAKE CARE OF RESULT FROM INSERT_ELEMENT. */ @@ -7538,7 +6057,7 @@ void Dbacc::shrinkcontainer(Signal* signal) fragrecptr.p->expReceiveForward = tidrForward; if (tshrRemLen < tshrInc) { jam(); - sendSystemerror(signal); + sendSystemerror(signal, __LINE__); }//if tshrRemLen = tshrRemLen - tshrInc; if (tshrRemLen != 0) { @@ -7573,7 +6092,7 @@ void Dbacc::nextcontainerinfoExp(Signal* signal) cexcForward = cminusOne; } else { jam(); - sendSystemerror(signal); + sendSystemerror(signal, __LINE__); cexcForward = 0; /* DUMMY FOR COMPILER */ }//if if (tnciNextSamePage == ZFALSE) { @@ -8072,7 +6591,7 @@ void Dbacc::execACC_SAVE_PAGES(Signal* signal) ptrCheckGuard(lcpConnectptr, clcpConnectsize, lcpConnectrec); if (lcpConnectptr.p->lcpstate != LCP_ACTIVE) { jam(); - sendSystemerror(signal); + sendSystemerror(signal, __LINE__); return; }//if if (ERROR_INSERTED(3000)) { @@ -8256,17 +6775,9 @@ void Dbacc::saveOverPagesLab(Signal* signal) jam(); ropPageptr = sopPageptr; releaseOverpage(signal); - } else if (((sopPageptr.p->word32[ZPOS_PAGE_TYPE] >> ZPOS_PAGE_TYPE_BIT) & 3) == - ZLONG_PAGE_TYPE) { - //---------------------------------------------------------------------- - // The long key page is empty, release it. - //---------------------------------------------------------------------- - jam(); - rlopPageptr = sopPageptr; - releaseLongPage(signal); } else { jam(); - sendSystemerror(signal); + sendSystemerror(signal, __LINE__); } }//if } @@ -8406,7 +6917,7 @@ void Dbacc::checkSyncUndoPagesLab(Signal* signal) break; default: jam(); - sendSystemerror(signal); + sendSystemerror(signal, __LINE__); return; break; }//switch @@ -8655,66 +7166,63 @@ void Dbacc::lcpCopyPage(Signal* signal) }//for }//for tlcnChecksum = Tchs; - if (((lcnCopyPageptr.p->word32[ZPOS_PAGE_TYPE] >> ZPOS_PAGE_TYPE_BIT) & 3) != ZLONG_PAGE_TYPE) { - jam(); - if (((lcnCopyPageptr.p->word32[ZPOS_PAGE_TYPE] >> ZPOS_PAGE_TYPE_BIT) & 3) == ZNORMAL_PAGE_TYPE) { - jam(); - /*-----------------------------------------------------------------*/ - /* TAKE CARE OF ALL 64 BUFFERS ADDRESSED BY ALGORITHM IN */ - /* FIRST PAGE. IF THEY ARE EMPTY THEY STILL HAVE A CONTAINER */ - /* HEADER OF 2 WORDS. */ - /*-----------------------------------------------------------------*/ - tlcnConIndex = ZHEAD_SIZE; - tlupForward = 1; - for (tlcnIndex = 0; tlcnIndex <= ZNO_CONTAINERS - 1; tlcnIndex++) { - tlupIndex = tlcnConIndex; - tlupElemIndex = tlcnConIndex + ZCON_HEAD_SIZE; - lcpUpdatePage(signal); - tlcnConIndex = tlcnConIndex + ZBUF_SIZE; - }//for - }//if + if (((lcnCopyPageptr.p->word32[ZPOS_PAGE_TYPE] >> ZPOS_PAGE_TYPE_BIT) & 3) == ZNORMAL_PAGE_TYPE) { + jam(); /*-----------------------------------------------------------------*/ - /* TAKE CARE OF ALL USED BUFFERS ON THE LEFT SIDE. */ + /* TAKE CARE OF ALL 64 BUFFERS ADDRESSED BY ALGORITHM IN */ + /* FIRST PAGE. IF THEY ARE EMPTY THEY STILL HAVE A CONTAINER */ + /* HEADER OF 2 WORDS. */ /*-----------------------------------------------------------------*/ - tlcnNextContainer = (lcnCopyPageptr.p->word32[ZPOS_EMPTY_LIST] >> 23) & 0x7f; - while (tlcnNextContainer < ZEMPTYLIST) { - tlcnConIndex = (tlcnNextContainer << ZSHIFT_PLUS) - (tlcnNextContainer << ZSHIFT_MINUS); - tlcnConIndex = tlcnConIndex + ZHEAD_SIZE; + tlcnConIndex = ZHEAD_SIZE; + tlupForward = 1; + for (tlcnIndex = 0; tlcnIndex <= ZNO_CONTAINERS - 1; tlcnIndex++) { tlupIndex = tlcnConIndex; tlupElemIndex = tlcnConIndex + ZCON_HEAD_SIZE; - tlupForward = 1; - lcpUpdatePage(signal); - tlcnNextContainer = (lcnCopyPageptr.p->word32[tlcnConIndex] >> 11) & 0x7f; - }//while - if (tlcnNextContainer == ZEMPTYLIST) { - jam(); - /*empty*/; - } else { - jam(); - sendSystemerror(signal); - return; - }//if - /*-----------------------------------------------------------------*/ - /* TAKE CARE OF ALL USED BUFFERS ON THE RIGHT SIDE. */ - /*-----------------------------------------------------------------*/ - tlupForward = cminusOne; - tlcnNextContainer = (lcnCopyPageptr.p->word32[ZPOS_EMPTY_LIST] >> 16) & 0x7f; - while (tlcnNextContainer < ZEMPTYLIST) { - tlcnConIndex = (tlcnNextContainer << ZSHIFT_PLUS) - (tlcnNextContainer << ZSHIFT_MINUS); - tlcnConIndex = tlcnConIndex + ((ZHEAD_SIZE + ZBUF_SIZE) - ZCON_HEAD_SIZE); - tlupIndex = tlcnConIndex; - tlupElemIndex = tlcnConIndex - 1; lcpUpdatePage(signal); - tlcnNextContainer = (lcnCopyPageptr.p->word32[tlcnConIndex] >> 11) & 0x7f; - }//while - if (tlcnNextContainer == ZEMPTYLIST) { - jam(); - /*empty*/; - } else { - jam(); - sendSystemerror(signal); - return; - }//if + tlcnConIndex = tlcnConIndex + ZBUF_SIZE; + }//for + }//if + /*-----------------------------------------------------------------*/ + /* TAKE CARE OF ALL USED BUFFERS ON THE LEFT SIDE. */ + /*-----------------------------------------------------------------*/ + tlcnNextContainer = (lcnCopyPageptr.p->word32[ZPOS_EMPTY_LIST] >> 23) & 0x7f; + while (tlcnNextContainer < ZEMPTYLIST) { + tlcnConIndex = (tlcnNextContainer << ZSHIFT_PLUS) - (tlcnNextContainer << ZSHIFT_MINUS); + tlcnConIndex = tlcnConIndex + ZHEAD_SIZE; + tlupIndex = tlcnConIndex; + tlupElemIndex = tlcnConIndex + ZCON_HEAD_SIZE; + tlupForward = 1; + lcpUpdatePage(signal); + tlcnNextContainer = (lcnCopyPageptr.p->word32[tlcnConIndex] >> 11) & 0x7f; + }//while + if (tlcnNextContainer == ZEMPTYLIST) { + jam(); + /*empty*/; + } else { + jam(); + sendSystemerror(signal, __LINE__); + return; + }//if + /*-----------------------------------------------------------------*/ + /* TAKE CARE OF ALL USED BUFFERS ON THE RIGHT SIDE. */ + /*-----------------------------------------------------------------*/ + tlupForward = cminusOne; + tlcnNextContainer = (lcnCopyPageptr.p->word32[ZPOS_EMPTY_LIST] >> 16) & 0x7f; + while (tlcnNextContainer < ZEMPTYLIST) { + tlcnConIndex = (tlcnNextContainer << ZSHIFT_PLUS) - (tlcnNextContainer << ZSHIFT_MINUS); + tlcnConIndex = tlcnConIndex + ((ZHEAD_SIZE + ZBUF_SIZE) - ZCON_HEAD_SIZE); + tlupIndex = tlcnConIndex; + tlupElemIndex = tlcnConIndex - 1; + lcpUpdatePage(signal); + tlcnNextContainer = (lcnCopyPageptr.p->word32[tlcnConIndex] >> 11) & 0x7f; + }//while + if (tlcnNextContainer == ZEMPTYLIST) { + jam(); + /*empty*/; + } else { + jam(); + sendSystemerror(signal, __LINE__); + return; }//if lcnCopyPageptr.p->word32[ZPOS_CHECKSUM] = tlcnChecksum; }//Dbacc::lcpCopyPage() @@ -8760,7 +7268,7 @@ void Dbacc::lcpUpdatePage(Signal* signal) }//while if (tlupConLen < ZCON_HEAD_SIZE) { jam(); - sendSystemerror(signal); + sendSystemerror(signal, __LINE__); }//if }//Dbacc::lcpUpdatePage() @@ -8775,78 +7283,75 @@ void Dbacc::srCheckPage(Signal* signal) Uint32 tlcnIndex; lupPageptr.p = lcnCopyPageptr.p; - if (((lcnCopyPageptr.p->word32[ZPOS_PAGE_TYPE] >> ZPOS_PAGE_TYPE_BIT) & 3) != ZLONG_PAGE_TYPE) { - jam(); - if (((lcnCopyPageptr.p->word32[ZPOS_PAGE_TYPE] >> ZPOS_PAGE_TYPE_BIT) & 3) == ZNORMAL_PAGE_TYPE) { - jam(); - /*-----------------------------------------------------------------*/ - /* TAKE CARE OF ALL 64 BUFFERS ADDRESSED BY ALGORITHM IN */ - /* FIRST PAGE. IF THEY ARE EMPTY THEY STILL HAVE A CONTAINER */ - /* HEADER OF 2 WORDS. */ - /*-----------------------------------------------------------------*/ - tlcnConIndex = ZHEAD_SIZE; - tlupForward = 1; - for (tlcnIndex = 0; tlcnIndex <= ZNO_CONTAINERS - 1; tlcnIndex++) { - tlupIndex = tlcnConIndex; - tlupElemIndex = tlcnConIndex + ZCON_HEAD_SIZE; - srCheckContainer(signal); - if (tresult != 0) { - jam(); - return; - }//if - tlcnConIndex = tlcnConIndex + ZBUF_SIZE; - }//for - }//if + if (((lcnCopyPageptr.p->word32[ZPOS_PAGE_TYPE] >> ZPOS_PAGE_TYPE_BIT) & 3) == ZNORMAL_PAGE_TYPE) { + jam(); /*-----------------------------------------------------------------*/ - /* TAKE CARE OF ALL USED BUFFERS ON THE LEFT SIDE. */ + /* TAKE CARE OF ALL 64 BUFFERS ADDRESSED BY ALGORITHM IN */ + /* FIRST PAGE. IF THEY ARE EMPTY THEY STILL HAVE A CONTAINER */ + /* HEADER OF 2 WORDS. */ /*-----------------------------------------------------------------*/ - tlcnNextContainer = (lcnCopyPageptr.p->word32[ZPOS_EMPTY_LIST] >> 23) & 0x7f; - while (tlcnNextContainer < ZEMPTYLIST) { - tlcnConIndex = (tlcnNextContainer << ZSHIFT_PLUS) - (tlcnNextContainer << ZSHIFT_MINUS); - tlcnConIndex = tlcnConIndex + ZHEAD_SIZE; + tlcnConIndex = ZHEAD_SIZE; + tlupForward = 1; + for (tlcnIndex = 0; tlcnIndex <= ZNO_CONTAINERS - 1; tlcnIndex++) { tlupIndex = tlcnConIndex; tlupElemIndex = tlcnConIndex + ZCON_HEAD_SIZE; - tlupForward = 1; srCheckContainer(signal); if (tresult != 0) { jam(); return; }//if - tlcnNextContainer = (lcnCopyPageptr.p->word32[tlcnConIndex] >> 11) & 0x7f; - }//while - if (tlcnNextContainer == ZEMPTYLIST) { - jam(); - /*empty*/; - } else { + tlcnConIndex = tlcnConIndex + ZBUF_SIZE; + }//for + }//if + /*-----------------------------------------------------------------*/ + /* TAKE CARE OF ALL USED BUFFERS ON THE LEFT SIDE. */ + /*-----------------------------------------------------------------*/ + tlcnNextContainer = (lcnCopyPageptr.p->word32[ZPOS_EMPTY_LIST] >> 23) & 0x7f; + while (tlcnNextContainer < ZEMPTYLIST) { + tlcnConIndex = (tlcnNextContainer << ZSHIFT_PLUS) - (tlcnNextContainer << ZSHIFT_MINUS); + tlcnConIndex = tlcnConIndex + ZHEAD_SIZE; + tlupIndex = tlcnConIndex; + tlupElemIndex = tlcnConIndex + ZCON_HEAD_SIZE; + tlupForward = 1; + srCheckContainer(signal); + if (tresult != 0) { jam(); - tresult = 4; return; }//if - /*-----------------------------------------------------------------*/ - /* TAKE CARE OF ALL USED BUFFERS ON THE RIGHT SIDE. */ - /*-----------------------------------------------------------------*/ - tlupForward = cminusOne; - tlcnNextContainer = (lcnCopyPageptr.p->word32[ZPOS_EMPTY_LIST] >> 16) & 0x7f; - while (tlcnNextContainer < ZEMPTYLIST) { - tlcnConIndex = (tlcnNextContainer << ZSHIFT_PLUS) - (tlcnNextContainer << ZSHIFT_MINUS); - tlcnConIndex = tlcnConIndex + ((ZHEAD_SIZE + ZBUF_SIZE) - ZCON_HEAD_SIZE); - tlupIndex = tlcnConIndex; - tlupElemIndex = tlcnConIndex - 1; - srCheckContainer(signal); - if (tresult != 0) { - jam(); - return; - }//if - tlcnNextContainer = (lcnCopyPageptr.p->word32[tlcnConIndex] >> 11) & 0x7f; - }//while - if (tlcnNextContainer == ZEMPTYLIST) { - jam(); - /*empty*/; - } else { + tlcnNextContainer = (lcnCopyPageptr.p->word32[tlcnConIndex] >> 11) & 0x7f; + }//while + if (tlcnNextContainer == ZEMPTYLIST) { + jam(); + /*empty*/; + } else { + jam(); + tresult = 4; + return; + }//if + /*-----------------------------------------------------------------*/ + /* TAKE CARE OF ALL USED BUFFERS ON THE RIGHT SIDE. */ + /*-----------------------------------------------------------------*/ + tlupForward = cminusOne; + tlcnNextContainer = (lcnCopyPageptr.p->word32[ZPOS_EMPTY_LIST] >> 16) & 0x7f; + while (tlcnNextContainer < ZEMPTYLIST) { + tlcnConIndex = (tlcnNextContainer << ZSHIFT_PLUS) - (tlcnNextContainer << ZSHIFT_MINUS); + tlcnConIndex = tlcnConIndex + ((ZHEAD_SIZE + ZBUF_SIZE) - ZCON_HEAD_SIZE); + tlupIndex = tlcnConIndex; + tlupElemIndex = tlcnConIndex - 1; + srCheckContainer(signal); + if (tresult != 0) { jam(); - tresult = 4; return; }//if + tlcnNextContainer = (lcnCopyPageptr.p->word32[tlcnConIndex] >> 11) & 0x7f; + }//while + if (tlcnNextContainer == ZEMPTYLIST) { + jam(); + /*empty*/; + } else { + jam(); + tresult = 4; + return; }//if }//Dbacc::srCheckPage() @@ -9030,50 +7535,14 @@ void Dbacc::undoWritingProcess(Signal* signal) writeUndoDataInfo(signal); checkUndoPages(signal); }//if - } else if (tpageType != ZLONG_PAGE_TYPE) { + } else { jam(); /* --------------------------------------------------------------------------- */ /* ONLY PAGE INFO AND OVERFLOW PAGE INFO CAN BE LOGGED BY THIS ROUTINE. A */ /* SERIOUS ERROR. */ /* --------------------------------------------------------------------------- */ - sendSystemerror(signal); - } else { - /* --------------------------------------------------------------------------------- */ - /* THIS LOG RECORD IS GENERATED ON A LONG KEY PAGE. THESE PAGES USE LOGICAL */ - /* LOGGING. */ - /* --------------------------------------------------------------------------------- */ - if (tactivePageDir >= fragrecptr.p->lcpMaxOverDirIndex) { - jam(); - /* --------------------------------------------------------------------------------- */ - /* OBVIOUSLY THE FRAGMENT HAS EXPANDED THE NUMBER OF OVERFLOW PAGES SINCE THE */ - /* START OF THE LOCAL CHECKPOINT. WE NEED NOT LOG ANY UPDATES OF PAGES THAT DID*/ - /* NOT EXIST AT START OF LCP. */ - /* --------------------------------------------------------------------------------- */ - /*empty*/; - } else { - jam(); - /* --------------------------------------------------------------------------------- */ - /* LOGICAL LOGGING OF LONG KEY PAGES CAN EITHER BE UNDO OF AN INSERT OR UNDO */ - /* OF A DELETE KEY. UNDO OF DELETE NEEDS TO LOG THE KEY TO BE REINSERTED WHILE */ - /* UNDO OF INSERT ONLY NEEDS TO LOG THE INDEX TO BE DELETED. */ - /* --------------------------------------------------------------------------------- */ - undopageptr.i = (cundoposition >> ZUNDOPAGEINDEXBITS) & (cundopagesize - 1); - ptrAss(undopageptr, undopage); - theadundoindex = cundoposition & ZUNDOPAGEINDEX_MASK; - tundoindex = theadundoindex + ZUNDOHEADSIZE; - if (cundoinfolength == 0) { - jam(); - writeUndoHeader(signal, tactivePageDir, UndoHeader::ZUNDO_INSERT_LONG_KEY); - } else { - jam(); - writeUndoHeader(signal, tactivePageDir, UndoHeader::ZUNDO_DELETE_LONG_KEY); - arrGuard(ZWORDS_IN_PAGE - cundoElemIndex, 2048); - tundoElemIndex = datapageptr.p->word32[ZWORDS_IN_PAGE - cundoElemIndex] & 0xffff; - writeUndoDataInfo(signal); - }//if - checkUndoPages(signal); - }//if - }//if + sendSystemerror(signal, __LINE__); + } } else { if (fragrecptr.p->fragState == LCP_SEND_OVER_PAGES) { jam(); @@ -9108,46 +7577,7 @@ void Dbacc::undoWritingProcess(Signal* signal) checkUndoPages(signal); }//if }//if - } else if (tpageType == ZLONG_PAGE_TYPE) { - if (tactivePageDir < fragrecptr.p->lcpDirIndex) { - jam(); - // ------------------------------------------------------------- - // THIS PAGE HAS ALREADY BEEN WRITTEN IN THE LOCAL CHECKPOINT. - // ------------------------------------------------------------- - } else { - if (tactivePageDir >= fragrecptr.p->lcpMaxOverDirIndex) { - jam(); - // ------------------------------------------------------------- - // OBVIOUSLY THE FRAGMENT HAS EXPANDED THE NUMBER OF OVERFLOW - // PAGES SINCE THE START OF THE LOCAL CHECKPOINT. WE NEED NOT - // LOG ANY UPDATES OF PAGES THAT DID NOT EXIST AT START OF LCP. - // ------------------------------------------------------------- - } else { - jam(); - // ------------------------------------------------------------- - // LOGICAL LOGGING OF LONG KEY PAGES CAN EITHER BE UNDO OF AN - // INSERT OR UNDO OF A DELETE KEY. UNDO OF DELETE NEEDS TO LOG - // THE KEY TO BE REINSERTED WHILE UNDO OF INSERT ONLY NEEDS TO - // LOG THE INDEX TO BE DELETED. - // ------------------------------------------------------------- - undopageptr.i = (cundoposition >> ZUNDOPAGEINDEXBITS) & (cundopagesize - 1); - ptrAss(undopageptr, undopage); - theadundoindex = cundoposition & ZUNDOPAGEINDEX_MASK; - tundoindex = theadundoindex + ZUNDOHEADSIZE; - if (cundoinfolength == 0) { - jam(); - writeUndoHeader(signal, tactivePageDir, UndoHeader::ZUNDO_INSERT_LONG_KEY); - } else { - jam(); - writeUndoHeader(signal, tactivePageDir, UndoHeader::ZUNDO_DELETE_LONG_KEY); - arrGuard(ZWORDS_IN_PAGE - cundoElemIndex, 2048); - tundoElemIndex = datapageptr.p->word32[ZWORDS_IN_PAGE - cundoElemIndex] & 0xffff; - writeUndoDataInfo(signal); - }//if - checkUndoPages(signal); - }//if - }//if - }//if + } }//if }//if }//Dbacc::undoWritingProcess() @@ -9200,8 +7630,9 @@ void Dbacc::writeUndoHeader(Signal* signal, (UndoHeader *) &undopageptr.p->undoword[theadundoindex]; undoHeaderPtr->tableId = rootfragrecptr.p->mytabptr; - undoHeaderPtr->rootFragId = rootfragrecptr.p->fragmentid[0]; + undoHeaderPtr->rootFragId = rootfragrecptr.p->fragmentid[0] >> 1; undoHeaderPtr->localFragId = fragrecptr.p->myfid; + ndbrequire((undoHeaderPtr->localFragId >> 1) == undoHeaderPtr->rootFragId); Uint32 Ttmp = cundoinfolength; Ttmp = (Ttmp << 4) + pageType; Ttmp = Ttmp << 14; @@ -9231,52 +7662,16 @@ void Dbacc::writeUndoOpInfo(Signal* signal) undopageptr.p->undoword[tundoindex + 1] = operationRecPtr.p->hashValue; undopageptr.p->undoword[tundoindex + 2] = operationRecPtr.p->tupkeylen; tundoindex = tundoindex + 3; - if (fragrecptr.p->keyLength != 0) { - // Fixed size keys - jam(); - locPageptr.i = operationRecPtr.p->elementPage; - ptrCheckGuard(locPageptr, cpagesize, page8); - Uint32 Tforward = operationRecPtr.p->elementIsforward; - Uint32 TelemPtr = operationRecPtr.p->elementPointer; - TelemPtr += Tforward; - TelemPtr += Tforward; - //--------------------------------------------------------------------------------- - // Now the pointer is at the start of the key part of the element. Now copy from there - // to the UNDO log. - //--------------------------------------------------------------------------------- - Uint32 keyLen = operationRecPtr.p->tupkeylen; - ndbrequire(keyLen <= 8); - arrGuard(tundoindex+keyLen, 8192); - for (Uint32 twuoiIndex = 0; twuoiIndex < keyLen; twuoiIndex++) { - jam(); - arrGuard(TelemPtr, 2048); - undopageptr.p->undoword[tundoindex] = locPageptr.p->word32[TelemPtr]; - tundoindex++; - TelemPtr += Tforward; - }//for - cundoinfolength = ZOP_HEAD_INFO_LN + operationRecPtr.p->tupkeylen; - } else { - // Long keys - jam(); - - arrGuard(operationRecPtr.p->longKeyPageIndex, ZMAX_NO_OF_LONGKEYS_IN_PAGE); - locPageptr.i = operationRecPtr.p->longPagePtr; - ptrCheckGuard(locPageptr, cpagesize, page8); - - Uint32 indexValue = - locPageptr.p->word32[ZWORDS_IN_PAGE - operationRecPtr.p->longKeyPageIndex]; - Uint32 keyLen = indexValue >> 16; - Uint32 physPageIndex = indexValue & 0xffff; - ndbrequire(keyLen == operationRecPtr.p->tupkeylen); - - arrGuard(tundoindex+keyLen, 8192); - arrGuard(physPageIndex+keyLen, 2048); - for (Uint32 i = 0; i < keyLen; i++){ - undopageptr.p->undoword[tundoindex + i] = locPageptr.p->word32[physPageIndex+i]; - } - tundoindex = tundoindex + keyLen; - cundoinfolength = ZOP_HEAD_INFO_LN + keyLen; - }//if + // log localkey1 + locPageptr.i = operationRecPtr.p->elementPage; + ptrCheckGuard(locPageptr, cpagesize, page8); + Uint32 Tforward = operationRecPtr.p->elementIsforward; + Uint32 TelemPtr = operationRecPtr.p->elementPointer; + TelemPtr += Tforward; // ZELEM_HEAD_SIZE + arrGuard(tundoindex+1, 8192); + undopageptr.p->undoword[tundoindex] = locPageptr.p->word32[TelemPtr]; + tundoindex++; + cundoinfolength = ZOP_HEAD_INFO_LN + 1; }//Dbacc::writeUndoOpInfo() /* --------------------------------------------------------------------------------- */ @@ -9460,7 +7855,7 @@ void Dbacc::initFragAdd(Signal* signal, }//if regFragPtr.p->fragState = ACTIVEFRAG; // NOTE: next line must match calculation in Dblqh::execLQHFRAGREQ - regFragPtr.p->myfid = (rootFragIndex << (lhFragBits - 1)) | req->fragId; + regFragPtr.p->myfid = (req->fragId << 1) | rootFragIndex; regFragPtr.p->myroot = rootIndex; regFragPtr.p->myTableId = req->tableId; ndbrequire(req->kValue == 6); @@ -9488,17 +7883,16 @@ void Dbacc::initFragAdd(Signal* signal, regFragPtr.p->dirsize = 1; regFragPtr.p->loadingFlag = ZFALSE; regFragPtr.p->keyLength = req->keyLength; - if (req->keyLength == 0) { - jam(); - regFragPtr.p->elementLength = (1 + ZELEM_HEAD_SIZE) + regFragPtr.p->localkeylen; - } else { - jam(); - regFragPtr.p->elementLength = (ZELEM_HEAD_SIZE + regFragPtr.p->localkeylen) + regFragPtr.p->keyLength; - }//if + ndbrequire(req->keyLength != 0); + regFragPtr.p->elementLength = ZELEM_HEAD_SIZE + regFragPtr.p->localkeylen; Uint32 Tmp1 = (regFragPtr.p->maxp + 1) + regFragPtr.p->p; Uint32 Tmp2 = regFragPtr.p->maxloadfactor - regFragPtr.p->minloadfactor; Tmp2 = Tmp1 * Tmp2; regFragPtr.p->slackCheck = Tmp2; + + Uint32 hasCharAttr = g_key_descriptor_pool.getPtr(req->tableId)->hasCharAttr; + regFragPtr.p->hasCharAttr = hasCharAttr; + }//Dbacc::initFragAdd() void Dbacc::initFragGeneral(FragmentrecPtr regFragPtr) @@ -9518,6 +7912,7 @@ void Dbacc::initFragGeneral(FragmentrecPtr regFragPtr) regFragPtr.p->activeDataPage = 0; regFragPtr.p->createLcp = ZFALSE; regFragPtr.p->stopQueOp = ZFALSE; + regFragPtr.p->hasCharAttr = ZFALSE; regFragPtr.p->nextAllocPage = 0; regFragPtr.p->nrWaitWriteUndoExit = 0; regFragPtr.p->lastUndoIsStored = ZFALSE; @@ -9776,7 +8171,7 @@ void Dbacc::srReadPagesLab(Signal* signal) for (Uint32 i = 0; i < limitLoop; i++) { jam(); seizePage(signal); - ndbrequire(tresult <= ZLIMIT_OF_ERROR); + ndbrequireErr(tresult <= ZLIMIT_OF_ERROR, NDBD_EXIT_SR_OUT_OF_INDEXMEMORY); fragrecptr.p->datapages[i] = spPageptr.i; signal->theData[i + 6] = spPageptr.i; }//for @@ -10103,7 +8498,7 @@ void Dbacc::startUndoLab(Signal* signal) }//for // Send report of how many undo log records where executed - signal->theData[0] = EventReport::UNDORecordsExecuted; + signal->theData[0] = NDB_LE_UNDORecordsExecuted; signal->theData[1] = DBACC; // From block signal->theData[2] = 0; // Total records executed for (int i = 0; i < 10; i++){ @@ -10149,7 +8544,7 @@ void Dbacc::startActiveUndo(Signal* signal) /*---------------------------------------------------------------------------*/ if (cfsFirstfreeconnect == RNIL) { jam(); - sendSystemerror(signal); + sendSystemerror(signal, __LINE__); }//if seizeFsConnectRec(signal); cactiveSrFsPtr = fsConnectptr.i; @@ -10296,6 +8691,7 @@ void Dbacc::srDoUndoLab(Signal* signal) // ROOT FRAGMENT ID tfid = undoHeaderPtr->rootFragId; + ndbrequire((undoHeaderPtr->localFragId >> 1) == undoHeaderPtr->rootFragId); if (!getrootfragmentrec(signal, rootfragrecptr, tfid)) { jam(); /*---------------------------------------------------------------------*/ @@ -10305,7 +8701,10 @@ void Dbacc::srDoUndoLab(Signal* signal) creadyUndoaddress = cprevUndoaddress; // PREVIOUS UNDO LOG RECORD FOR ALL FRAGMENTS cprevUndoaddress = undoHeaderPtr->prevUndoAddress; - undoNext2Lab(signal); + undoNext2Lab(signal); +#ifdef VM_TRACE + ndbout_c("ignoring root fid %d", (int)tfid); +#endif return; }//if /*-----------------------------------------------------------------------*/ @@ -10322,7 +8721,8 @@ void Dbacc::srDoUndoLab(Signal* signal) ptrCheckGuard(fragrecptr, cfragmentsize, fragmentrec); } else { jam(); - progError(__LINE__, 0, "Invalid local fragment id in undo log"); + progError(__LINE__, NDBD_EXIT_SR_UNDOLOG, + "Invalid local fragment id in undo log"); return; }//if }//if @@ -10415,103 +8815,6 @@ void Dbacc::srDoUndoLab(Signal* signal) break; } - case UndoHeader::ZUNDO_INSERT_LONG_KEY:{ - jam(); - /*---------------------------------------------------------------------*/ - /* WE WILL UNDO AN INSERT OF A LONG KEY. THIS IS PERFORMED BY DELETING */ - /* THE LONG KEY. */ - /*---------------------------------------------------------------------*/ - souDirRangePtr.i = fragrecptr.p->overflowdir; - tmpP2 = tmpP >> 8; - tmpP = tmpP & 0xff; - arrGuard(tmpP2, 256); - ptrCheckGuard(souDirRangePtr, cdirrangesize, dirRange); - souDirptr.i = souDirRangePtr.p->dirArray[tmpP2]; - ptrCheckGuard(souDirptr, cdirarraysize, directoryarray); - dlkPageptr.i = souDirptr.p->pagep[tmpP]; - ptrCheckGuard(dlkPageptr, cpagesize, page8); - tdlkLogicalPageIndex = tundoPageindex; - deleteLongKey(signal); - break; - } - - case UndoHeader::ZUNDO_DELETE_LONG_KEY: { - jam(); - /*----------------------------------------------------------------------*/ - /* WE WILL UNDO DELETE OF A LONG KEY. THIS IS PERFORMED BY INSERTING */ - /* IT AGAIN. */ - /*----------------------------------------------------------------------*/ - souDirRangePtr.i = fragrecptr.p->overflowdir; - taslpDirIndex = tmpP; - tmpP2 = tmpP >> 8; - tmpP = tmpP & 0xff; - ptrCheckGuard(souDirRangePtr, cdirrangesize, dirRange); - arrGuard(tmpP2, 256); - souDirptr.i = souDirRangePtr.p->dirArray[tmpP2]; - - if(souDirptr.i == RNIL) { - //---------------------------------------------------------------- - // Allocate a directory. - //---------------------------------------------------------------- - jam(); - seizeDirectory(signal); - if (tresult > ZLIMIT_OF_ERROR) { - jam(); - sendSystemerror(signal); - return; - } - souDirRangePtr.p->dirArray[tmpP2] = sdDirptr.i; - souDirptr.i = souDirRangePtr.p->dirArray[tmpP2]; - } - - ptrCheckGuard(souDirptr, cdirarraysize, directoryarray); - slkapPageptr.i = souDirptr.p->pagep[tmpP]; - - if(slkapPageptr.i == RNIL) { - //---------------------------------------------------------------- - // The delete operation was probably the last on the page and the - // page was released and not written down to disk. We need to - // allocate a page and put it in the same dirindex as it was in - // before it was released. - // This is because an eventual UNDO_INSERT on the same key in the - // same LCP must be able to find the key and it has only the - // dirindex to go on, the key itself is not saved on disk in a - // UNDO_INSERT. - //---------------------------------------------------------------- - jam(); - allocSpecificLongOverflowPage(signal); - slkapPageptr.i = aslpPageptr.i; - } - - ptrCheckGuard(slkapPageptr, cpagesize, page8); - seizePage(signal); - ndbrequire(tresult <= ZLIMIT_OF_ERROR); - - slkapCopyPageptr = spPageptr; - ndbrequire(cundoinfolength <= 2048); - - for (Uint32 tmp = 0; tmp < cundoinfolength; tmp++) { - dbgWord32(slkapCopyPageptr, tmp, undopageptr.p->undoword[tmpindex]); - slkapCopyPageptr.p->word32[tmp] = undopageptr.p->undoword[tmpindex]; - tmpindex = tmpindex + 1; - }//for - jam(); - //---------------------------------------------------------------- - // We must store the key at the same place it was deleted from. - // This is because an eventual UNDO_INSERT on the same key in the - // same LCP must be able to find the key and it has only the index - // information stored on disk to go on, the key itself is not - // saved on disk in an UNDO_INSERT. - //---------------------------------------------------------------- - tslkapKeyLen = cundoinfolength; - tslkapPageIndex = tundoPageindex; - storeLongKeysAtPos(signal); - - rpPageptr = slkapCopyPageptr; - releasePage(signal); - break; - } - case UndoHeader::ZOP_INFO: { jam(); /*---------------------------------------------------------------------*/ @@ -10550,6 +8853,7 @@ void Dbacc::srDoUndoLab(Signal* signal) operationRecPtr.p->longKeyPageIndex = RNIL; operationRecPtr.p->scanRecPtr = RNIL; operationRecPtr.p->isAccLockReq = ZFALSE; + operationRecPtr.p->isUndoLogReq = ZTRUE; // Read operation values from undo page operationRecPtr.p->operation = undopageptr.p->undoword[tmpindex]; @@ -10559,17 +8863,15 @@ void Dbacc::srDoUndoLab(Signal* signal) const Uint32 tkeylen = undopageptr.p->undoword[tmpindex]; tmpindex++; operationRecPtr.p->tupkeylen = tkeylen; + operationRecPtr.p->xfrmtupkeylen = 0; // not used operationRecPtr.p->fragptr = fragrecptr.i; - ndbrequire((fragrecptr.p->keyLength == 0) || - ((fragrecptr.p->keyLength != 0) && - (fragrecptr.p->keyLength == tkeylen))); + ndbrequire(fragrecptr.p->keyLength != 0 && + fragrecptr.p->keyLength == tkeylen); - // Read keydata from undo page - for (Uint32 tmp = 0; tmp < tkeylen; tmp++) { - signal->theData[7+tmp] = undopageptr.p->undoword[tmpindex]; - tmpindex = tmpindex + 1; - }//for + // Read localkey1 from undo page + signal->theData[7 + 0] = undopageptr.p->undoword[tmpindex]; + tmpindex = tmpindex + 1; arrGuard((tmpindex - 1), 8192); getElement(signal); if (tgeResult != ZTRUE) { @@ -10597,7 +8899,7 @@ void Dbacc::srDoUndoLab(Signal* signal) default: jam(); - progError(__LINE__, 0, "Invalid pagetype in undo log"); + progError(__LINE__, NDBD_EXIT_SR_UNDOLOG, "Invalid pagetype in undo log"); break; }//switch(tpageType) @@ -10752,43 +9054,25 @@ void Dbacc::execACC_OVER_REC(Signal* signal) ptrCheckGuard(pnoPageidptr, cpagesize, page8); tpnoPageType = pnoPageidptr.p->word32[ZPOS_PAGE_TYPE]; tpnoPageType = (tpnoPageType >> ZPOS_PAGE_TYPE_BIT) & 3; - if (tpnoPageType == ZLONG_PAGE_TYPE) { + if (pnoPageidptr.p->word32[ZPOS_ALLOC_CONTAINERS] > ZFREE_LIMIT) { jam(); - // This is to clean the list parameters. - pnoPageidptr.p->word32[ZPOS_PREV_PAGE] = RNIL; - pnoPageidptr.p->word32[ZPOS_NEXT_PAGE] = RNIL; - if (pnoPageidptr.p->word32[ZPOS_ARRAY_POS] != 4) { - jam(); - /*---------------------------------------------------------------------------*/ - /* THE PAGE WAS A LONG PAGE AND IT BELONGED TO A FREE LIST. PUT IT INTO ONE */ - /* OF THE FREE LIST THEN. */ - /*---------------------------------------------------------------------------*/ - // Insert page! - ipaPagePtr = pnoPageidptr; - tipaArrayPos = pnoPageidptr.p->word32[ZPOS_ARRAY_POS]; - insertPageArrayList(signal); - }//if + dbgWord32(pnoPageidptr, ZPOS_OVERFLOWREC, RNIL); + pnoPageidptr.p->word32[ZPOS_OVERFLOWREC] = RNIL; + ndbrequire(pnoPageidptr.p->word32[ZPOS_PAGE_ID] == fragrecptr.p->nextAllocPage); } else { - if (pnoPageidptr.p->word32[ZPOS_ALLOC_CONTAINERS] > ZFREE_LIMIT) { - jam(); - dbgWord32(pnoPageidptr, ZPOS_OVERFLOWREC, RNIL); - pnoPageidptr.p->word32[ZPOS_OVERFLOWREC] = RNIL; - ndbrequire(pnoPageidptr.p->word32[ZPOS_PAGE_ID] == fragrecptr.p->nextAllocPage); - } else { + jam(); + seizeOverRec(signal); + sorOverflowRecPtr.p->dirindex = pnoPageidptr.p->word32[ZPOS_PAGE_ID]; + ndbrequire(sorOverflowRecPtr.p->dirindex == fragrecptr.p->nextAllocPage); + dbgWord32(pnoPageidptr, ZPOS_OVERFLOWREC, sorOverflowRecPtr.i); + pnoPageidptr.p->word32[ZPOS_OVERFLOWREC] = sorOverflowRecPtr.i; + sorOverflowRecPtr.p->overpage = pnoPageidptr.i; + porOverflowRecPtr = sorOverflowRecPtr; + putOverflowRecInFrag(signal); + if (pnoPageidptr.p->word32[ZPOS_ALLOC_CONTAINERS] == 0) { jam(); - seizeOverRec(signal); - sorOverflowRecPtr.p->dirindex = pnoPageidptr.p->word32[ZPOS_PAGE_ID]; - ndbrequire(sorOverflowRecPtr.p->dirindex == fragrecptr.p->nextAllocPage); - dbgWord32(pnoPageidptr, ZPOS_OVERFLOWREC, sorOverflowRecPtr.i); - pnoPageidptr.p->word32[ZPOS_OVERFLOWREC] = sorOverflowRecPtr.i; - sorOverflowRecPtr.p->overpage = pnoPageidptr.i; - porOverflowRecPtr = sorOverflowRecPtr; - putOverflowRecInFrag(signal); - if (pnoPageidptr.p->word32[ZPOS_ALLOC_CONTAINERS] == 0) { - jam(); - ropPageptr = pnoPageidptr; - releaseOverpage(signal); - }//if + ropPageptr = pnoPageidptr; + releaseOverpage(signal); }//if }//if }//if @@ -10863,7 +9147,6 @@ void Dbacc::execACC_SCANREQ(Signal* signal) rootfragrecptr.p->scan[i] = scanPtr.i; scanPtr.p->scanBucketState = ScanRec::FIRST_LAP; scanPtr.p->scanLockMode = AccScanReq::getLockMode(tscanFlag); - scanPtr.p->scanKeyinfoFlag = AccScanReq::getKeyinfoFlag(tscanFlag); scanPtr.p->scanReadCommittedFlag = AccScanReq::getReadCommittedFlag(tscanFlag); /* TWELVE BITS OF THE ELEMENT HEAD ARE SCAN */ @@ -11086,7 +9369,7 @@ void Dbacc::checkNextBucketLab(Signal* signal) /* --------------------------------------------------------------------------------- */ if (scanPtr.p->minBucketIndexToRescan != 0) { jam(); - sendSystemerror(signal); + sendSystemerror(signal, __LINE__); return; }//if scanPtr.p->maxBucketIndexToRescan = fragrecptr.p->p + fragrecptr.p->maxp; @@ -11251,7 +9534,7 @@ void Dbacc::checkNextFragmentLab(Signal* signal) } else { jam(); /* ALL ELEMENTS ARE SENT */ - sendSystemerror(signal); + sendSystemerror(signal, __LINE__); }//if }//if /* --------------------------------------------------------------------------------- */ @@ -11611,11 +9894,6 @@ void Dbacc::initScanOpRec(Signal* signal) Uint32 tisoTmp; Uint32 tisoLocalPtr; Uint32 guard24; - Uint32 tisoPageIndex; - Uint32 tisoPagedir; - DirRangePtr tisoOverflowrangeptr; - DirectoryarrayPtr tisoOverflowDirptr; - Page8Ptr tisoPageptr; scanPtr.p->scanOpsAllocated++; @@ -11644,6 +9922,7 @@ void Dbacc::initScanOpRec(Signal* signal) operationRecPtr.p->elementPointer = tisoElementptr; operationRecPtr.p->elementPage = isoPageptr.i; operationRecPtr.p->isAccLockReq = ZFALSE; + operationRecPtr.p->isUndoLogReq = ZFALSE; tisoLocalPtr = tisoElementptr + tisoIsforward; guard24 = fragrecptr.p->localkeylen - 1; for (tisoTmp = 0; tisoTmp <= guard24; tisoTmp++) { @@ -11654,40 +9933,8 @@ void Dbacc::initScanOpRec(Signal* signal) }//for arrGuard(tisoLocalPtr, 2048); operationRecPtr.p->keydata[0] = isoPageptr.p->word32[tisoLocalPtr]; - if (fragrecptr.p->keyLength != 0) { - jam(); - operationRecPtr.p->tupkeylen = fragrecptr.p->keyLength; - guard24 = fragrecptr.p->keyLength - 1; - for (tisoTmp = 0; tisoTmp <= guard24; tisoTmp++) { - arrGuard(tisoTmp, 8); - arrGuard(tisoLocalPtr, 2048); - operationRecPtr.p->keydata[tisoTmp] = isoPageptr.p->word32[tisoLocalPtr]; - tisoLocalPtr = tisoLocalPtr + tisoIsforward; - }//for - } else { - // Long key handling. Put the long key reference in the operation records. - tisoPageIndex = operationRecPtr.p->keydata[0] & 0x3ff; - arrGuard(ZWORDS_IN_PAGE - tisoPageIndex, 2048); - - tisoPagedir = operationRecPtr.p->keydata[0] >> 10; - arrGuard((tisoPagedir >> 8), 256); - - tisoOverflowrangeptr.i = fragrecptr.p->overflowdir; - ptrCheckGuard(tisoOverflowrangeptr, cdirrangesize, dirRange); - - tisoOverflowDirptr.i = tisoOverflowrangeptr.p->dirArray[tisoPagedir >> 8]; - ptrCheckGuard(tisoOverflowDirptr, cdirarraysize, directoryarray); - - tisoPageptr.i = tisoOverflowDirptr.p->pagep[tisoPagedir & 0xff]; - ptrCheckGuard(tisoPageptr, cpagesize, page8); - - operationRecPtr.p->longPagePtr = tisoPageptr.i; - operationRecPtr.p->longKeyPageIndex = tisoPageIndex; - - // Read length of key from page - Uint32 tmp = tisoPageptr.p->word32[ZWORDS_IN_PAGE - tisoPageIndex]; - operationRecPtr.p->tupkeylen = tmp >> 16; - } + operationRecPtr.p->tupkeylen = fragrecptr.p->keyLength; + operationRecPtr.p->xfrmtupkeylen = 0; // not used }//Dbacc::initScanOpRec() /* --------------------------------------------------------------------------------- */ @@ -11882,21 +10129,15 @@ void Dbacc::releaseScanContainer(Signal* signal) Uint32 trscElemlens; Uint32 trscElemlen; - if (trscContainerlen < 5) { + if (trscContainerlen < 4) { if (trscContainerlen != ZCON_HEAD_SIZE) { jam(); - sendSystemerror(signal); + sendSystemerror(signal, __LINE__); }//if - return; /* 3 IS THE MINIMUM SIZE OF THE ELEMENT */ - }//if - trscElemlens = trscContainerlen - 2; - if (fragrecptr.p->keyLength != 0) { - jam(); - trscElemlen = (1 + fragrecptr.p->keyLength) + fragrecptr.p->localkeylen; /* LENGTH OF THE ELEMENT */ - } else { - jam(); - trscElemlen = (1 + ZACTIVE_LONG_KEY_LEN) + fragrecptr.p->localkeylen; /* LENGTH OF THE ELEMENT */ + return; /* 2 IS THE MINIMUM SIZE OF THE ELEMENT */ }//if + trscElemlens = trscContainerlen - ZCON_HEAD_SIZE; + trscElemlen = fragrecptr.p->elementLength; if (trscIsforward == 1) { jam(); trscElementptr = trscContainerptr + ZCON_HEAD_SIZE; @@ -11923,10 +10164,10 @@ void Dbacc::releaseScanContainer(Signal* signal) }//if trscElemlens = trscElemlens - trscElemlen; trscElementptr = trscElementptr + trscElemStep; - } while (trscElemlens > 2); + } while (trscElemlens > 1); if (trscElemlens != 0) { jam(); - sendSystemerror(signal); + sendSystemerror(signal, __LINE__); }//if }//Dbacc::releaseScanContainer() @@ -11982,19 +10223,12 @@ bool Dbacc::searchScanContainer(Signal* signal) Uint32 tsscElemlen; Uint32 tsscElemStep; - if (tsscContainerlen < 5) { + if (tsscContainerlen < 4) { jam(); - return false; /* 3 IS THE MINIMUM SIZE OF THE ELEMENT */ + return false; /* 2 IS THE MINIMUM SIZE OF THE ELEMENT */ }//if tsscElemlens = tsscContainerlen - ZCON_HEAD_SIZE; - if (fragrecptr.p->keyLength == 0) { - jam(); - tsscElemlen = (ZELEM_HEAD_SIZE + ZACTIVE_LONG_KEY_LEN) + fragrecptr.p->localkeylen; - } else { - jam(); - /* LENGTH OF THE ELEMENT */ - tsscElemlen = (ZELEM_HEAD_SIZE + fragrecptr.p->keyLength) + fragrecptr.p->localkeylen; - }//if + tsscElemlen = fragrecptr.p->elementLength; /* LENGTH OF THE ELEMENT */ if (tsscIsforward == 1) { jam(); @@ -12032,7 +10266,7 @@ bool Dbacc::searchScanContainer(Signal* signal) /* THE ELEMENT IS ALREADY SENT. */ /* SEARCH FOR NEXT ONE */ tsscElemlens = tsscElemlens - tsscElemlen; - if (tsscElemlens > 2) { + if (tsscElemlens > 1) { jam(); tsscElementptr = tsscElementptr + tsscElemStep; goto SCANELEMENTLOOP001; @@ -12047,172 +10281,20 @@ void Dbacc::sendNextScanConf(Signal* signal) { scanPtr.p->scanTimer = scanPtr.p->scanContinuebCounter; Uint32 blockNo = refToBlock(scanPtr.p->scanUserblockref); - if (!scanPtr.p->scanKeyinfoFlag){ - jam(); - /** --------------------------------------------------------------------- - * LQH WILL NOT HAVE ANY USE OF THE TUPLE KEY LENGTH IN THIS CASE AND - * SO WE DO NOT PROVIDE IT. IN THIS CASE THESE VALUES ARE UNDEFINED. - * ---------------------------------------------------------------------- */ - signal->theData[0] = scanPtr.p->scanUserptr; - signal->theData[1] = operationRecPtr.i; - signal->theData[2] = operationRecPtr.p->fid; - signal->theData[3] = operationRecPtr.p->localdata[0]; - signal->theData[4] = operationRecPtr.p->localdata[1]; - signal->theData[5] = fragrecptr.p->localkeylen; - EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, 6); - return; - }//if - - if (fragrecptr.p->keyLength != 0) { - jam(); - signal->theData[0] = scanPtr.p->scanUserptr; - signal->theData[1] = operationRecPtr.i; - signal->theData[2] = operationRecPtr.p->fid; - signal->theData[3] = operationRecPtr.p->localdata[0]; - signal->theData[4] = operationRecPtr.p->localdata[1]; - signal->theData[5] = fragrecptr.p->localkeylen; - signal->theData[6] = fragrecptr.p->keyLength; - signal->theData[7] = operationRecPtr.p->keydata[0]; - signal->theData[8] = operationRecPtr.p->keydata[1]; - signal->theData[9] = operationRecPtr.p->keydata[2]; - signal->theData[10] = operationRecPtr.p->keydata[3]; - EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, 11); - if (fragrecptr.p->keyLength > ZKEYINKEYREQ) { - jam(); - /* = 4 */ - signal->theData[0] = scanPtr.p->scanUserptr; - signal->theData[1] = operationRecPtr.i; - signal->theData[2] = operationRecPtr.p->fid; - signal->theData[3] = fragrecptr.p->keyLength - ZKEYINKEYREQ; - signal->theData[4] = operationRecPtr.p->keydata[4]; - signal->theData[5] = operationRecPtr.p->keydata[5]; - signal->theData[6] = operationRecPtr.p->keydata[6]; - signal->theData[7] = operationRecPtr.p->keydata[7]; - EXECUTE_DIRECT(blockNo, GSN_ACC_SCAN_INFO, signal, 8); - return; - }//if - } else { - jam(); - sendScaninfo(signal); - return; - }//if -}//Dbacc::sendNextScanConf() - -/* --------------------------------------------------------------------------------- */ -/* SEND_SCANINFO */ -/* DESCRIPTION: SCAN AN ELEMENT OF A LONG_KEY_PAGE. */ -/* --------------------------------------------------------------------------------- */ -void Dbacc::sendScaninfo(Signal* signal) -{ - DirRangePtr ssiOverflowrangeptr; - DirectoryarrayPtr ssiOverflowDirptr; - Page8Ptr ssiPageptr; - Uint32 tssiPageIndex; - Uint32 tssiPagedir; - Uint32 tssiKeyLen; - Uint32 tssiStartIndex; - Uint32 tssiIndexValue; - Uint32 tssiTmp; - - Uint32 blockNo = refToBlock(scanPtr.p->scanUserblockref); - - tssiPageIndex = operationRecPtr.p->keydata[0] & 0x3ff; - tssiPagedir = operationRecPtr.p->keydata[0] >> 10; - ssiOverflowrangeptr.i = fragrecptr.p->overflowdir; - ptrCheckGuard(ssiOverflowrangeptr, cdirrangesize, dirRange); - arrGuard((tssiPagedir >> 8), 256); - ssiOverflowDirptr.i = ssiOverflowrangeptr.p->dirArray[tssiPagedir >> 8]; - ptrCheckGuard(ssiOverflowDirptr, cdirarraysize, directoryarray); - ssiPageptr.i = ssiOverflowDirptr.p->pagep[tssiPagedir & 0xff]; - ptrCheckGuard(ssiPageptr, cpagesize, page8); - arrGuard(ZWORDS_IN_PAGE - tssiPageIndex, 2048); - tssiIndexValue = ssiPageptr.p->word32[ZWORDS_IN_PAGE - tssiPageIndex]; - tssiStartIndex = tssiIndexValue & 0xffff; - tssiKeyLen = tssiIndexValue >> 16; + jam(); + /** --------------------------------------------------------------------- + * LQH WILL NOT HAVE ANY USE OF THE TUPLE KEY LENGTH IN THIS CASE AND + * SO WE DO NOT PROVIDE IT. IN THIS CASE THESE VALUES ARE UNDEFINED. + * ---------------------------------------------------------------------- */ signal->theData[0] = scanPtr.p->scanUserptr; signal->theData[1] = operationRecPtr.i; signal->theData[2] = operationRecPtr.p->fid; signal->theData[3] = operationRecPtr.p->localdata[0]; signal->theData[4] = operationRecPtr.p->localdata[1]; signal->theData[5] = fragrecptr.p->localkeylen; - signal->theData[6] = tssiKeyLen; - arrGuard(tssiStartIndex + 3, 2048); - signal->theData[7] = ssiPageptr.p->word32[tssiStartIndex]; - signal->theData[8] = ssiPageptr.p->word32[tssiStartIndex + 1]; - signal->theData[9] = ssiPageptr.p->word32[tssiStartIndex + 2]; - signal->theData[10] = ssiPageptr.p->word32[tssiStartIndex + 3]; - EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, 11); - if (tssiKeyLen > 4) { - tssiKeyLen = tssiKeyLen - 4; - tssiStartIndex = tssiStartIndex + 4; - SSI_LOOP_10: - jamEntry(); - if (tssiKeyLen > ZMAXSCANSIGNALLEN) { - jam(); - signal->theData[0] = scanPtr.p->scanUserptr; - signal->theData[1] = operationRecPtr.i; - signal->theData[2] = operationRecPtr.p->fid; - signal->theData[3] = ZMAXSCANSIGNALLEN; - arrGuard(tssiStartIndex + 19, 2048); - signal->theData[4] = ssiPageptr.p->word32[tssiStartIndex]; - signal->theData[5] = ssiPageptr.p->word32[tssiStartIndex + 1]; - signal->theData[6] = ssiPageptr.p->word32[tssiStartIndex + 2]; - signal->theData[7] = ssiPageptr.p->word32[tssiStartIndex + 3]; - signal->theData[8] = ssiPageptr.p->word32[tssiStartIndex + 4]; - signal->theData[9] = ssiPageptr.p->word32[tssiStartIndex + 5]; - signal->theData[10] = ssiPageptr.p->word32[tssiStartIndex + 6]; - signal->theData[11] = ssiPageptr.p->word32[tssiStartIndex + 7]; - signal->theData[12] = ssiPageptr.p->word32[tssiStartIndex + 8]; - signal->theData[13] = ssiPageptr.p->word32[tssiStartIndex + 9]; - signal->theData[14] = ssiPageptr.p->word32[tssiStartIndex + 10]; - signal->theData[15] = ssiPageptr.p->word32[tssiStartIndex + 11]; - signal->theData[16] = ssiPageptr.p->word32[tssiStartIndex + 12]; - signal->theData[17] = ssiPageptr.p->word32[tssiStartIndex + 13]; - signal->theData[18] = ssiPageptr.p->word32[tssiStartIndex + 14]; - signal->theData[19] = ssiPageptr.p->word32[tssiStartIndex + 15]; - signal->theData[20] = ssiPageptr.p->word32[tssiStartIndex + 16]; - signal->theData[21] = ssiPageptr.p->word32[tssiStartIndex + 17]; - signal->theData[22] = ssiPageptr.p->word32[tssiStartIndex + 18]; - signal->theData[23] = ssiPageptr.p->word32[tssiStartIndex + 19]; - EXECUTE_DIRECT(blockNo, GSN_ACC_SCAN_INFO24, signal, 24); - tssiStartIndex = tssiStartIndex + ZMAXSCANSIGNALLEN; - tssiKeyLen = tssiKeyLen - ZMAXSCANSIGNALLEN; - goto SSI_LOOP_10; - } else { - jam(); - ndbrequire((tssiStartIndex + tssiKeyLen) <= 2048); - for (tssiTmp = 0; tssiTmp < tssiKeyLen; tssiTmp++) { - ckeys[tssiTmp] = ssiPageptr.p->word32[tssiStartIndex + tssiTmp]; - }//for - signal->theData[0] = scanPtr.p->scanUserptr; - signal->theData[1] = operationRecPtr.i; - signal->theData[2] = operationRecPtr.p->fid; - /* LOCAL FRAGMENT IDENTITY */ - signal->theData[3] = tssiKeyLen; - signal->theData[4] = ckeys[0]; - signal->theData[5] = ckeys[1]; - signal->theData[6] = ckeys[2]; - signal->theData[7] = ckeys[3]; - signal->theData[8] = ckeys[4]; - signal->theData[9] = ckeys[5]; - signal->theData[10] = ckeys[6]; - signal->theData[11] = ckeys[7]; - signal->theData[12] = ckeys[8]; - signal->theData[13] = ckeys[9]; - signal->theData[14] = ckeys[10]; - signal->theData[15] = ckeys[11]; - signal->theData[16] = ckeys[12]; - signal->theData[17] = ckeys[13]; - signal->theData[18] = ckeys[14]; - signal->theData[19] = ckeys[15]; - signal->theData[20] = ckeys[16]; - signal->theData[21] = ckeys[17]; - signal->theData[22] = ckeys[18]; - signal->theData[23] = ckeys[19]; - EXECUTE_DIRECT(blockNo, GSN_ACC_SCAN_INFO24, signal, 24); - }//if - }//if -}//Dbacc::sendScaninfo() + EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, 6); + return; +}//Dbacc::sendNextScanConf() /*--------------------------------------------------------------------------- * sendScanHbRep @@ -13202,9 +11284,9 @@ void Dbacc::seizeSrVerRec(Signal* signal) /* --------------------------------------------------------------------------------- */ /* SEND_SYSTEMERROR */ /* --------------------------------------------------------------------------------- */ -void Dbacc::sendSystemerror(Signal* signal) +void Dbacc::sendSystemerror(Signal* signal, int line) { - progError(0, 0); + progError(line, NDBD_EXIT_PRGERR); }//Dbacc::sendSystemerror() /* --------------------------------------------------------------------------------- */ @@ -13265,7 +11347,7 @@ void Dbacc::takeRecOutOfFreeOverpage(Signal* signal) void Dbacc::reportMemoryUsage(Signal* signal, int gth){ - signal->theData[0] = EventReport::MemoryUsage; + signal->theData[0] = NDB_LE_MemoryUsage; signal->theData[1] = gth; signal->theData[2] = sizeof(* rpPageptr.p); signal->theData[3] = cnoOfAllocatedPages; @@ -13315,13 +11397,12 @@ Dbacc::execDUMP_STATE_ORD(Signal* signal) scanPtr.p->minBucketIndexToRescan, scanPtr.p->maxBucketIndexToRescan); infoEvent(" scanBucketState=%d, scanLockHeld=%d, userBlockRef=%d, " - "scanMask=%d scanLockMode=%d, keyInfoFlag=%d", + "scanMask=%d scanLockMode=%d", scanPtr.p->scanBucketState, scanPtr.p->scanLockHeld, scanPtr.p->scanUserblockref, scanPtr.p->scanMask, - scanPtr.p->scanLockMode, - scanPtr.p->scanKeyinfoFlag); + scanPtr.p->scanLockMode); return; } diff --git a/ndb/src/kernel/blocks/dbacc/Makefile.am b/ndb/src/kernel/blocks/dbacc/Makefile.am index e44524c3edd..ca1b1efac37 100644 --- a/ndb/src/kernel/blocks/dbacc/Makefile.am +++ b/ndb/src/kernel/blocks/dbacc/Makefile.am @@ -3,6 +3,8 @@ noinst_LIBRARIES = libdbacc.a libdbacc_a_SOURCES = DbaccInit.cpp DbaccMain.cpp +INCLUDES_LOC = -I$(top_srcdir)/ndb/src/kernel/blocks/dbtup + include $(top_srcdir)/ndb/config/common.mk.am include $(top_srcdir)/ndb/config/type_kernel.mk.am diff --git a/ndb/src/kernel/blocks/dbdict/Dbdict.cpp b/ndb/src/kernel/blocks/dbdict/Dbdict.cpp index 2bb429aeabc..efd519339f7 100644 --- a/ndb/src/kernel/blocks/dbdict/Dbdict.cpp +++ b/ndb/src/kernel/blocks/dbdict/Dbdict.cpp @@ -27,6 +27,7 @@ #include <SectionReader.hpp> #include <SimpleProperties.hpp> #include <AttributeHeader.hpp> +#include <KeyDescriptor.hpp> #include <signaldata/DictSchemaInfo.hpp> #include <signaldata/DictTabInfo.hpp> #include <signaldata/DropTabFile.hpp> @@ -202,6 +203,11 @@ void Dbdict::execCONTINUEB(Signal* signal) sendGetTabResponse(signal); break; + case ZDICT_LOCK_POLL: + jam(); + checkDictLockQueue(signal, true); + break; + default : ndbrequire(false); break; @@ -228,7 +234,7 @@ void Dbdict::packTableIntoPages(Signal* signal, Uint32 tableId, Uint32 pageId) 8 * ZSIZE_OF_PAGES_IN_WORDS); w.first(); - packTableIntoPagesImpl(w, tablePtr); + packTableIntoPagesImpl(w, tablePtr, signal); Uint32 wordsOfTable = w.getWordsUsed(); Uint32 pagesUsed = @@ -257,7 +263,8 @@ void Dbdict::packTableIntoPages(Signal* signal, Uint32 tableId, Uint32 pageId) void Dbdict::packTableIntoPagesImpl(SimpleProperties::Writer & w, - TableRecordPtr tablePtr){ + TableRecordPtr tablePtr, + Signal* signal){ w.add(DictTabInfo::TableName, tablePtr.p->tableName); w.add(DictTabInfo::TableId, tablePtr.i); @@ -278,9 +285,36 @@ Dbdict::packTableIntoPagesImpl(SimpleProperties::Writer & w, w.add(DictTabInfo::MaxLoadFactor, tablePtr.p->maxLoadFactor); w.add(DictTabInfo::TableKValue, tablePtr.p->kValue); w.add(DictTabInfo::FragmentTypeVal, tablePtr.p->fragmentType); - w.add(DictTabInfo::FragmentKeyTypeVal, tablePtr.p->fragmentKeyType); w.add(DictTabInfo::TableTypeVal, tablePtr.p->tableType); - w.add(DictTabInfo::FragmentCount, tablePtr.p->fragmentCount); + w.add(DictTabInfo::MaxRowsLow, tablePtr.p->maxRowsLow); + w.add(DictTabInfo::MaxRowsHigh, tablePtr.p->maxRowsHigh); + w.add(DictTabInfo::MinRowsLow, tablePtr.p->minRowsLow); + w.add(DictTabInfo::MinRowsHigh, tablePtr.p->minRowsHigh); + + if(!signal) + { + w.add(DictTabInfo::FragmentCount, tablePtr.p->fragmentCount); + } + else + { + Uint32 * theData = signal->getDataPtrSend(); + CreateFragmentationReq * const req = (CreateFragmentationReq*)theData; + req->senderRef = 0; + req->senderData = RNIL; + req->fragmentationType = tablePtr.p->fragmentType; + req->noOfFragments = 0; + req->fragmentNode = 0; + req->primaryTableId = tablePtr.i; + EXECUTE_DIRECT(DBDIH, GSN_CREATE_FRAGMENTATION_REQ, signal, + CreateFragmentationReq::SignalLength); + if(signal->theData[0] == 0) + { + Uint16 *data = (Uint16*)&signal->theData[25]; + Uint32 count = 2 + data[0] * data[1]; + w.add(DictTabInfo::FragmentDataLen, 2*count); + w.add(DictTabInfo::FragmentData, data, 2*count); + } + } if (tablePtr.p->primaryTableId != RNIL){ TableRecordPtr primTab; @@ -311,18 +345,14 @@ Dbdict::packTableIntoPagesImpl(SimpleProperties::Writer & w, const Uint32 attrSize = AttributeDescriptor::getSize(desc); const Uint32 arraySize = AttributeDescriptor::getArraySize(desc); const Uint32 nullable = AttributeDescriptor::getNullable(desc); - const Uint32 DGroup = AttributeDescriptor::getDGroup(desc); const Uint32 DKey = AttributeDescriptor::getDKey(desc); - const Uint32 attrStoredInd = AttributeDescriptor::getStoredInTup(desc); - w.add(DictTabInfo::AttributeType, attrType); + // AttributeType deprecated w.add(DictTabInfo::AttributeSize, attrSize); w.add(DictTabInfo::AttributeArraySize, arraySize); w.add(DictTabInfo::AttributeNullableFlag, nullable); - w.add(DictTabInfo::AttributeDGroup, DGroup); w.add(DictTabInfo::AttributeDKey, DKey); - w.add(DictTabInfo::AttributeStoredInd, attrStoredInd); - w.add(DictTabInfo::AttributeExtType, attrPtr.p->extType); + w.add(DictTabInfo::AttributeExtType, attrType); w.add(DictTabInfo::AttributeExtPrecision, attrPtr.p->extPrecision); w.add(DictTabInfo::AttributeExtScale, attrPtr.p->extScale); w.add(DictTabInfo::AttributeExtLength, attrPtr.p->extLength); @@ -370,7 +400,7 @@ void Dbdict::execFSCLOSECONF(Signal* signal) closeWriteTableConf(signal, fsPtr); break; case FsConnectRecord::OPEN_READ_SCHEMA2: - openSchemaFile(signal, 1, fsPtr.i, false); + openSchemaFile(signal, 1, fsPtr.i, false, false); break; default: jamLine((fsPtr.p->fsState & 0xFFF)); @@ -631,7 +661,7 @@ void Dbdict::writeTableFile(Signal* signal, Uint32 filePtr, Uint32 fsConPtr) FsReadWriteReq::setSyncFlag(fsRWReq->operationFlag, 1); FsReadWriteReq::setFormatFlag(fsRWReq->operationFlag, FsReadWriteReq::fsFormatArrayOfPages); - fsRWReq->varIndex = ZALLOCATE; + fsRWReq->varIndex = ZBAT_TABLE_FILE; fsRWReq->numberOfPages = c_writeTableRecord.noOfPages; fsRWReq->data.arrayOfPages.varIndex = c_writeTableRecord.pageId; fsRWReq->data.arrayOfPages.fileOffset = 0; // Write to file page 0 @@ -708,7 +738,7 @@ void Dbdict::readTableFile(Signal* signal, Uint32 filePtr, Uint32 fsConPtr) FsReadWriteReq::setSyncFlag(fsRWReq->operationFlag, 0); FsReadWriteReq::setFormatFlag(fsRWReq->operationFlag, FsReadWriteReq::fsFormatArrayOfPages); - fsRWReq->varIndex = ZALLOCATE; + fsRWReq->varIndex = ZBAT_TABLE_FILE; fsRWReq->numberOfPages = c_readTableRecord.noOfPages; fsRWReq->data.arrayOfPages.varIndex = c_readTableRecord.pageId; fsRWReq->data.arrayOfPages.fileOffset = 0; // Write to file page 0 @@ -774,11 +804,9 @@ Dbdict::updateSchemaState(Signal* signal, Uint32 tableId, SchemaFile::TableEntry* te, Callback* callback){ jam(); - PageRecordPtr pagePtr; - c_pageRecordArray.getPtr(pagePtr, c_schemaRecord.schemaPage); - ndbrequire(tableId < c_tableRecordPool.getSize()); - SchemaFile::TableEntry * tableEntry = getTableEntry(pagePtr.p, tableId); + XSchemaFile * xsf = &c_schemaFile[c_schemaRecord.schemaPage != 0]; + SchemaFile::TableEntry * tableEntry = getTableEntry(xsf, tableId); SchemaFile::TableState newState = (SchemaFile::TableState)te->m_tableState; @@ -825,12 +853,15 @@ Dbdict::updateSchemaState(Signal* signal, Uint32 tableId, ndbrequire(ok); * tableEntry = * te; - computeChecksum((SchemaFile*)pagePtr.p); + computeChecksum(xsf, tableId / NDB_SF_PAGE_ENTRIES); ndbrequire(c_writeSchemaRecord.inUse == false); c_writeSchemaRecord.inUse = true; c_writeSchemaRecord.pageId = c_schemaRecord.schemaPage; + c_writeSchemaRecord.newFile = false; + c_writeSchemaRecord.firstPage = tableId / NDB_SF_PAGE_ENTRIES; + c_writeSchemaRecord.noOfPages = 1; c_writeSchemaRecord.m_callback = * callback; startWriteSchemaFile(signal); @@ -841,14 +872,15 @@ void Dbdict::startWriteSchemaFile(Signal* signal) FsConnectRecordPtr fsPtr; c_fsConnectRecordPool.getPtr(fsPtr, getFsConnRecord()); fsPtr.p->fsState = FsConnectRecord::OPEN_WRITE_SCHEMA; - openSchemaFile(signal, 0, fsPtr.i, true); + openSchemaFile(signal, 0, fsPtr.i, true, c_writeSchemaRecord.newFile); c_writeSchemaRecord.noOfSchemaFilesHandled = 0; }//Dbdict::startWriteSchemaFile() void Dbdict::openSchemaFile(Signal* signal, Uint32 fileNo, Uint32 fsConPtr, - bool writeFlag) + bool writeFlag, + bool newFile) { FsOpenReq * const fsOpenReq = (FsOpenReq *)&signal->theData[0]; fsOpenReq->userReference = reference(); @@ -857,9 +889,11 @@ void Dbdict::openSchemaFile(Signal* signal, jam(); fsOpenReq->fileFlags = FsOpenReq::OM_WRITEONLY | - FsOpenReq::OM_TRUNCATE | - FsOpenReq::OM_CREATE | FsOpenReq::OM_SYNC; + if (newFile) + fsOpenReq->fileFlags |= + FsOpenReq::OM_TRUNCATE | + FsOpenReq::OM_CREATE; } else { jam(); fsOpenReq->fileFlags = FsOpenReq::OM_READONLY; @@ -884,6 +918,12 @@ void Dbdict::writeSchemaFile(Signal* signal, Uint32 filePtr, Uint32 fsConPtr) { FsReadWriteReq * const fsRWReq = (FsReadWriteReq *)&signal->theData[0]; + // check write record + WriteSchemaRecord & wr = c_writeSchemaRecord; + ndbrequire(wr.pageId == (wr.pageId != 0) * NDB_SF_MAX_PAGES); + ndbrequire(wr.noOfPages != 0); + ndbrequire(wr.firstPage + wr.noOfPages <= NDB_SF_MAX_PAGES); + fsRWReq->filePointer = filePtr; fsRWReq->userReference = reference(); fsRWReq->userPointer = fsConPtr; @@ -891,11 +931,11 @@ void Dbdict::writeSchemaFile(Signal* signal, Uint32 filePtr, Uint32 fsConPtr) FsReadWriteReq::setSyncFlag(fsRWReq->operationFlag, 1); FsReadWriteReq::setFormatFlag(fsRWReq->operationFlag, FsReadWriteReq::fsFormatArrayOfPages); - fsRWReq->varIndex = ZALLOCATE; - fsRWReq->numberOfPages = 1; -// Write from memory page - fsRWReq->data.arrayOfPages.varIndex = c_writeSchemaRecord.pageId; - fsRWReq->data.arrayOfPages.fileOffset = 0; // Write to file page 0 + fsRWReq->varIndex = ZBAT_SCHEMA_FILE; + fsRWReq->numberOfPages = wr.noOfPages; + // Write from memory page + fsRWReq->data.arrayOfPages.varIndex = wr.pageId + wr.firstPage; + fsRWReq->data.arrayOfPages.fileOffset = wr.firstPage; sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 8, JBA); }//writeSchemaFile() @@ -925,7 +965,7 @@ void Dbdict::closeWriteSchemaConf(Signal* signal, if (c_writeSchemaRecord.noOfSchemaFilesHandled < 2) { jam(); fsPtr.p->fsState = FsConnectRecord::OPEN_WRITE_SCHEMA; - openSchemaFile(signal, 1, fsPtr.i, true); + openSchemaFile(signal, 1, fsPtr.i, true, c_writeSchemaRecord.newFile); return; } ndbrequire(c_writeSchemaRecord.noOfSchemaFilesHandled == 2); @@ -943,20 +983,26 @@ void Dbdict::startReadSchemaFile(Signal* signal) FsConnectRecordPtr fsPtr; c_fsConnectRecordPool.getPtr(fsPtr, getFsConnRecord()); fsPtr.p->fsState = FsConnectRecord::OPEN_READ_SCHEMA1; - openSchemaFile(signal, 0, fsPtr.i, false); + openSchemaFile(signal, 0, fsPtr.i, false, false); }//Dbdict::startReadSchemaFile() void Dbdict::openReadSchemaRef(Signal* signal, FsConnectRecordPtr fsPtr) { fsPtr.p->fsState = FsConnectRecord::OPEN_READ_SCHEMA2; - openSchemaFile(signal, 1, fsPtr.i, false); + openSchemaFile(signal, 1, fsPtr.i, false, false); }//Dbdict::openReadSchemaRef() void Dbdict::readSchemaFile(Signal* signal, Uint32 filePtr, Uint32 fsConPtr) { FsReadWriteReq * const fsRWReq = (FsReadWriteReq *)&signal->theData[0]; + // check read record + ReadSchemaRecord & rr = c_readSchemaRecord; + ndbrequire(rr.pageId == (rr.pageId != 0) * NDB_SF_MAX_PAGES); + ndbrequire(rr.noOfPages != 0); + ndbrequire(rr.firstPage + rr.noOfPages <= NDB_SF_MAX_PAGES); + fsRWReq->filePointer = filePtr; fsRWReq->userReference = reference(); fsRWReq->userPointer = fsConPtr; @@ -964,10 +1010,10 @@ void Dbdict::readSchemaFile(Signal* signal, Uint32 filePtr, Uint32 fsConPtr) FsReadWriteReq::setSyncFlag(fsRWReq->operationFlag, 0); FsReadWriteReq::setFormatFlag(fsRWReq->operationFlag, FsReadWriteReq::fsFormatArrayOfPages); - fsRWReq->varIndex = ZALLOCATE; - fsRWReq->numberOfPages = 1; - fsRWReq->data.arrayOfPages.varIndex = c_readSchemaRecord.pageId; - fsRWReq->data.arrayOfPages.fileOffset = 0; + fsRWReq->varIndex = ZBAT_SCHEMA_FILE; + fsRWReq->numberOfPages = rr.noOfPages; + fsRWReq->data.arrayOfPages.varIndex = rr.pageId + rr.firstPage; + fsRWReq->data.arrayOfPages.fileOffset = rr.firstPage; sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 8, JBA); }//readSchemaFile() @@ -985,20 +1031,61 @@ void Dbdict::readSchemaConf(Signal* signal, jam(); crashInd = true; }//if - PageRecordPtr tmpPagePtr; - c_pageRecordArray.getPtr(tmpPagePtr, c_readSchemaRecord.pageId); - Uint32 sz = ZSIZE_OF_PAGES_IN_WORDS; - Uint32 chk = computeChecksum((const Uint32*)tmpPagePtr.p, sz); + ReadSchemaRecord & rr = c_readSchemaRecord; + XSchemaFile * xsf = &c_schemaFile[rr.pageId != 0]; - ndbrequire((chk == 0) || !crashInd); + if (rr.schemaReadState == ReadSchemaRecord::INITIAL_READ_HEAD) { + jam(); + ndbrequire(rr.firstPage == 0); + SchemaFile * sf = &xsf->schemaPage[0]; + Uint32 noOfPages; + if (sf->NdbVersion < NDB_SF_VERSION_5_0_6) { + jam(); + const Uint32 pageSize_old = 32 * 1024; + noOfPages = pageSize_old / NDB_SF_PAGE_SIZE - 1; + } else { + noOfPages = sf->FileSize / NDB_SF_PAGE_SIZE - 1; + } + rr.schemaReadState = ReadSchemaRecord::INITIAL_READ; + if (noOfPages != 0) { + rr.firstPage = 1; + rr.noOfPages = noOfPages; + readSchemaFile(signal, fsPtr.p->filePtr, fsPtr.i); + return; + } + } + + SchemaFile * sf0 = &xsf->schemaPage[0]; + xsf->noOfPages = sf0->FileSize / NDB_SF_PAGE_SIZE; - if (chk != 0){ + if (sf0->NdbVersion < NDB_SF_VERSION_5_0_6 && + ! convertSchemaFileTo_5_0_6(xsf)) { jam(); + ndbrequire(! crashInd); ndbrequire(fsPtr.p->fsState == FsConnectRecord::READ_SCHEMA1); readSchemaRef(signal, fsPtr); return; - }//if + } + + for (Uint32 n = 0; n < xsf->noOfPages; n++) { + SchemaFile * sf = &xsf->schemaPage[n]; + bool ok = + memcmp(sf->Magic, NDB_SF_MAGIC, sizeof(sf->Magic)) == 0 && + sf->FileSize != 0 && + sf->FileSize % NDB_SF_PAGE_SIZE == 0 && + sf->FileSize == sf0->FileSize && + sf->PageNumber == n && + computeChecksum((Uint32*)sf, NDB_SF_PAGE_SIZE_IN_WORDS) == 0; + ndbrequire(ok || !crashInd); + if (! ok) { + jam(); + ndbrequire(fsPtr.p->fsState == FsConnectRecord::READ_SCHEMA1); + readSchemaRef(signal, fsPtr); + return; + } + } + fsPtr.p->fsState = FsConnectRecord::CLOSE_READ_SCHEMA; closeFile(signal, fsPtr.p->filePtr, fsPtr.i); return; @@ -1025,7 +1112,27 @@ void Dbdict::closeReadSchemaConf(Signal* signal, switch(state) { case ReadSchemaRecord::INITIAL_READ : jam(); - sendNDB_STTORRY(signal); + { + // write back both copies + + ndbrequire(c_writeSchemaRecord.inUse == false); + XSchemaFile * xsf = &c_schemaFile[c_schemaRecord.oldSchemaPage != 0 ]; + Uint32 noOfPages = + (c_tableRecordPool.getSize() + NDB_SF_PAGE_ENTRIES - 1) / + NDB_SF_PAGE_ENTRIES; + resizeSchemaFile(xsf, noOfPages); + + c_writeSchemaRecord.inUse = true; + c_writeSchemaRecord.pageId = c_schemaRecord.oldSchemaPage; + c_writeSchemaRecord.newFile = true; + c_writeSchemaRecord.firstPage = 0; + c_writeSchemaRecord.noOfPages = xsf->noOfPages; + + c_writeSchemaRecord.m_callback.m_callbackFunction = + safe_cast(&Dbdict::initSchemaFile_conf); + + startWriteSchemaFile(signal); + } break; default : @@ -1035,6 +1142,54 @@ void Dbdict::closeReadSchemaConf(Signal* signal, }//switch }//Dbdict::closeReadSchemaConf() +bool +Dbdict::convertSchemaFileTo_5_0_6(XSchemaFile * xsf) +{ + const Uint32 pageSize_old = 32 * 1024; + Uint32 page_old[pageSize_old >> 2]; + SchemaFile * sf_old = (SchemaFile *)page_old; + + if (xsf->noOfPages * NDB_SF_PAGE_SIZE != pageSize_old) + return false; + SchemaFile * sf0 = &xsf->schemaPage[0]; + memcpy(sf_old, sf0, pageSize_old); + + // init max number new pages needed + xsf->noOfPages = (sf_old->NoOfTableEntries + NDB_SF_PAGE_ENTRIES - 1) / + NDB_SF_PAGE_ENTRIES; + initSchemaFile(xsf, 0, xsf->noOfPages, true); + + Uint32 noOfPages = 1; + Uint32 n, i, j; + for (n = 0; n < xsf->noOfPages; n++) { + jam(); + for (i = 0; i < NDB_SF_PAGE_ENTRIES; i++) { + j = n * NDB_SF_PAGE_ENTRIES + i; + if (j >= sf_old->NoOfTableEntries) + continue; + const SchemaFile::TableEntry_old & te_old = sf_old->TableEntries_old[j]; + if (te_old.m_tableState == SchemaFile::INIT || + te_old.m_tableState == SchemaFile::DROP_TABLE_COMMITTED || + te_old.m_noOfPages == 0) + continue; + SchemaFile * sf = &xsf->schemaPage[n]; + SchemaFile::TableEntry & te = sf->TableEntries[i]; + te.m_tableState = te_old.m_tableState; + te.m_tableVersion = te_old.m_tableVersion; + te.m_tableType = te_old.m_tableType; + te.m_info_words = te_old.m_noOfPages * ZSIZE_OF_PAGES_IN_WORDS - + ZPAGE_HEADER_SIZE; + te.m_gcp = te_old.m_gcp; + if (noOfPages < n) + noOfPages = n; + } + } + xsf->noOfPages = noOfPages; + initSchemaFile(xsf, 0, xsf->noOfPages, false); + + return true; +} + /* **************************************************************** */ /* ---------------------------------------------------------------- */ /* MODULE: INITIALISATION MODULE ------------------------- */ @@ -1055,14 +1210,12 @@ Dbdict::Dbdict(const class Configuration & conf): c_opDropIndex(c_opRecordPool), c_opAlterIndex(c_opRecordPool), c_opBuildIndex(c_opRecordPool), - c_opCreateEvent(c_opRecordPool), - c_opSubEvent(c_opRecordPool), - c_opDropEvent(c_opRecordPool), - c_opSignalUtil(c_opRecordPool), c_opCreateTrigger(c_opRecordPool), c_opDropTrigger(c_opRecordPool), c_opAlterTrigger(c_opRecordPool), - c_opRecordSequence(0) + c_opRecordSequence(0), + c_dictLockQueue(c_dictLockPool), + c_dictLockPoll(false) { BLOCK_CONSTRUCTOR(Dbdict); @@ -1116,44 +1269,6 @@ Dbdict::Dbdict(const class Configuration & conf): addRecSignal(GSN_BUILDINDXCONF, &Dbdict::execBUILDINDXCONF); addRecSignal(GSN_BUILDINDXREF, &Dbdict::execBUILDINDXREF); - // Util signals - addRecSignal(GSN_UTIL_PREPARE_CONF, &Dbdict::execUTIL_PREPARE_CONF); - addRecSignal(GSN_UTIL_PREPARE_REF, &Dbdict::execUTIL_PREPARE_REF); - - addRecSignal(GSN_UTIL_EXECUTE_CONF, &Dbdict::execUTIL_EXECUTE_CONF); - addRecSignal(GSN_UTIL_EXECUTE_REF, &Dbdict::execUTIL_EXECUTE_REF); - - addRecSignal(GSN_UTIL_RELEASE_CONF, &Dbdict::execUTIL_RELEASE_CONF); - addRecSignal(GSN_UTIL_RELEASE_REF, &Dbdict::execUTIL_RELEASE_REF); - - // Event signals - addRecSignal(GSN_CREATE_EVNT_REQ, &Dbdict::execCREATE_EVNT_REQ); - addRecSignal(GSN_CREATE_EVNT_CONF, &Dbdict::execCREATE_EVNT_CONF); - addRecSignal(GSN_CREATE_EVNT_REF, &Dbdict::execCREATE_EVNT_REF); - - addRecSignal(GSN_CREATE_SUBID_CONF, &Dbdict::execCREATE_SUBID_CONF); - addRecSignal(GSN_CREATE_SUBID_REF, &Dbdict::execCREATE_SUBID_REF); - - addRecSignal(GSN_SUB_CREATE_CONF, &Dbdict::execSUB_CREATE_CONF); - addRecSignal(GSN_SUB_CREATE_REF, &Dbdict::execSUB_CREATE_REF); - - addRecSignal(GSN_SUB_START_REQ, &Dbdict::execSUB_START_REQ); - addRecSignal(GSN_SUB_START_CONF, &Dbdict::execSUB_START_CONF); - addRecSignal(GSN_SUB_START_REF, &Dbdict::execSUB_START_REF); - - addRecSignal(GSN_SUB_STOP_REQ, &Dbdict::execSUB_STOP_REQ); - addRecSignal(GSN_SUB_STOP_CONF, &Dbdict::execSUB_STOP_CONF); - addRecSignal(GSN_SUB_STOP_REF, &Dbdict::execSUB_STOP_REF); - - addRecSignal(GSN_SUB_SYNC_CONF, &Dbdict::execSUB_SYNC_CONF); - addRecSignal(GSN_SUB_SYNC_REF, &Dbdict::execSUB_SYNC_REF); - - addRecSignal(GSN_DROP_EVNT_REQ, &Dbdict::execDROP_EVNT_REQ); - - addRecSignal(GSN_SUB_REMOVE_REQ, &Dbdict::execSUB_REMOVE_REQ); - addRecSignal(GSN_SUB_REMOVE_CONF, &Dbdict::execSUB_REMOVE_CONF); - addRecSignal(GSN_SUB_REMOVE_REF, &Dbdict::execSUB_REMOVE_REF); - // Trigger signals addRecSignal(GSN_CREATE_TRIG_REQ, &Dbdict::execCREATE_TRIG_REQ); addRecSignal(GSN_CREATE_TRIG_CONF, &Dbdict::execCREATE_TRIG_CONF); @@ -1206,6 +1321,9 @@ Dbdict::Dbdict(const class Configuration & conf): addRecSignal(GSN_DROP_TAB_CONF, &Dbdict::execDROP_TAB_CONF); addRecSignal(GSN_BACKUP_FRAGMENT_REQ, &Dbdict::execBACKUP_FRAGMENT_REQ); + + addRecSignal(GSN_DICT_LOCK_REQ, &Dbdict::execDICT_LOCK_REQ); + addRecSignal(GSN_DICT_UNLOCK_ORD, &Dbdict::execDICT_UNLOCK_ORD); }//Dbdict::Dbdict() Dbdict::~Dbdict() @@ -1306,6 +1424,7 @@ void Dbdict::initRetrieveRecord(Signal* signal, Uint32 i, Uint32 returnCode) void Dbdict::initSchemaRecord() { c_schemaRecord.schemaPage = RNIL; + c_schemaRecord.oldSchemaPage = RNIL; }//Dbdict::initSchemaRecord() void Dbdict::initRestartRecord() @@ -1327,10 +1446,10 @@ void Dbdict::initNodeRecords() void Dbdict::initPageRecords() { - c_schemaRecord.schemaPage = ZMAX_PAGES_OF_TABLE_DEFINITION; - c_schemaRecord.oldSchemaPage = ZMAX_PAGES_OF_TABLE_DEFINITION + 1; - c_retrieveRecord.retrievePage = ZMAX_PAGES_OF_TABLE_DEFINITION + 2; - ndbrequire(ZNUMBER_OF_PAGES >= (2 * ZMAX_PAGES_OF_TABLE_DEFINITION + 2)); + c_retrieveRecord.retrievePage = ZMAX_PAGES_OF_TABLE_DEFINITION; + ndbrequire(ZNUMBER_OF_PAGES >= (ZMAX_PAGES_OF_TABLE_DEFINITION + 1)); + c_schemaRecord.schemaPage = 0; + c_schemaRecord.oldSchemaPage = NDB_SF_MAX_PAGES; }//Dbdict::initPageRecords() void Dbdict::initTableRecords() @@ -1360,9 +1479,7 @@ void Dbdict::initialiseTableRecord(TableRecordPtr tablePtr) tablePtr.p->tableVersion = (Uint32)-1; tablePtr.p->tabState = TableRecord::NOT_DEFINED; tablePtr.p->tabReturnState = TableRecord::TRS_IDLE; - tablePtr.p->storageType = DictTabInfo::MainMemory; tablePtr.p->fragmentType = DictTabInfo::AllNodesSmallTable; - tablePtr.p->fragmentKeyType = DictTabInfo::PrimaryKey; memset(tablePtr.p->tableName, 0, sizeof(tablePtr.p->tableName)); tablePtr.p->gciTableCreated = 0; tablePtr.p->noOfAttributes = ZNIL; @@ -1380,6 +1497,10 @@ void Dbdict::initialiseTableRecord(TableRecordPtr tablePtr) tablePtr.p->minLoadFactor = 70; tablePtr.p->noOfPrimkey = 1; tablePtr.p->tupKeyLength = 1; + tablePtr.p->maxRowsLow = 0; + tablePtr.p->maxRowsHigh = 0; + tablePtr.p->minRowsLow = 0; + tablePtr.p->minRowsHigh = 0; tablePtr.p->storedTable = true; tablePtr.p->tableType = DictTabInfo::UserTable; tablePtr.p->primaryTableId = RNIL; @@ -1599,18 +1720,16 @@ void Dbdict::execREAD_CONFIG_REQ(Signal* signal) c_fsConnectRecordPool.setSize(ZFS_CONNECT_SIZE); c_nodes.setSize(MAX_NODES); c_pageRecordArray.setSize(ZNUMBER_OF_PAGES); + c_schemaPageRecordArray.setSize(2 * NDB_SF_MAX_PAGES); c_tableRecordPool.setSize(tablerecSize); c_tableRecordHash.setSize(tablerecSize); + g_key_descriptor_pool.setSize(tablerecSize); c_triggerRecordPool.setSize(c_maxNoOfTriggers); c_triggerRecordHash.setSize(c_maxNoOfTriggers); c_opRecordPool.setSize(256); // XXX need config params c_opCreateTable.setSize(8); c_opDropTable.setSize(8); c_opCreateIndex.setSize(8); - c_opCreateEvent.setSize(8); - c_opSubEvent.setSize(8); - c_opDropEvent.setSize(8); - c_opSignalUtil.setSize(8); c_opDropIndex.setSize(8); c_opAlterIndex.setSize(8); c_opBuildIndex.setSize(8); @@ -1618,11 +1737,24 @@ void Dbdict::execREAD_CONFIG_REQ(Signal* signal) c_opDropTrigger.setSize(8); c_opAlterTrigger.setSize(8); + c_dictLockPool.setSize(32); + + // Initialize schema file copies + c_schemaFile[0].schemaPage = + (SchemaFile*)c_schemaPageRecordArray.getPtr(0 * NDB_SF_MAX_PAGES); + c_schemaFile[0].noOfPages = 0; + c_schemaFile[1].schemaPage = + (SchemaFile*)c_schemaPageRecordArray.getPtr(1 * NDB_SF_MAX_PAGES); + c_schemaFile[1].noOfPages = 0; + // Initialize BAT for interface to file system - PageRecordPtr pageRecPtr; - c_pageRecordArray.getPtr(pageRecPtr, 0); NewVARIABLE* bat = allocateBat(2); - bat[1].WA = &pageRecPtr.p->word[0]; + bat[0].WA = &c_schemaPageRecordArray.getPtr(0)->word[0]; + bat[0].nrr = 2 * NDB_SF_MAX_PAGES; + bat[0].ClusterSize = NDB_SF_PAGE_SIZE; + bat[0].bits.q = NDB_SF_PAGE_SIZE_IN_WORDS_LOG2; + bat[0].bits.v = 5; // 32 bits per element + bat[1].WA = &c_pageRecordArray.getPtr(0)->word[0]; bat[1].nrr = ZNUMBER_OF_PAGES; bat[1].ClusterSize = ZSIZE_OF_PAGES_IN_WORDS * 4; bat[1].bits.q = ZLOG_SIZE_OF_PAGES_IN_WORDS; // 2**13 = 8192 elements @@ -1767,16 +1899,23 @@ void Dbdict::execHOT_SPAREREP(Signal* signal) void Dbdict::initSchemaFile(Signal* signal) { - PageRecordPtr pagePtr; - c_pageRecordArray.getPtr(pagePtr, c_schemaRecord.schemaPage); - SchemaFile * schemaFile = (SchemaFile *)pagePtr.p; - initSchemaFile(schemaFile, 4 * ZSIZE_OF_PAGES_IN_WORDS); + XSchemaFile * xsf = &c_schemaFile[c_schemaRecord.schemaPage != 0]; + xsf->noOfPages = (c_tableRecordPool.getSize() + NDB_SF_PAGE_ENTRIES - 1) + / NDB_SF_PAGE_ENTRIES; + initSchemaFile(xsf, 0, xsf->noOfPages, true); + // init alt copy too for INR + XSchemaFile * oldxsf = &c_schemaFile[c_schemaRecord.oldSchemaPage != 0]; + oldxsf->noOfPages = xsf->noOfPages; + memcpy(&oldxsf->schemaPage[0], &xsf->schemaPage[0], xsf->schemaPage[0].FileSize); if (c_initialStart || c_initialNodeRestart) { jam(); ndbrequire(c_writeSchemaRecord.inUse == false); c_writeSchemaRecord.inUse = true; c_writeSchemaRecord.pageId = c_schemaRecord.schemaPage; + c_writeSchemaRecord.newFile = true; + c_writeSchemaRecord.firstPage = 0; + c_writeSchemaRecord.noOfPages = xsf->noOfPages; c_writeSchemaRecord.m_callback.m_callbackFunction = safe_cast(&Dbdict::initSchemaFile_conf); @@ -1786,7 +1925,9 @@ void Dbdict::initSchemaFile(Signal* signal) jam(); ndbrequire(c_readSchemaRecord.schemaReadState == ReadSchemaRecord::IDLE); c_readSchemaRecord.pageId = c_schemaRecord.oldSchemaPage; - c_readSchemaRecord.schemaReadState = ReadSchemaRecord::INITIAL_READ; + c_readSchemaRecord.firstPage = 0; + c_readSchemaRecord.noOfPages = 1; + c_readSchemaRecord.schemaReadState = ReadSchemaRecord::INITIAL_READ_HEAD; startReadSchemaFile(signal); } else { ndbrequire(false); @@ -1925,7 +2066,7 @@ void Dbdict::execDICTSTARTREQ(Signal* signal) safe_cast(&Dbdict::masterRestart_checkSchemaStatusComplete); c_restartRecord.activeTable = 0; - c_schemaRecord.schemaPage = c_schemaRecord.oldSchemaPage; + c_schemaRecord.schemaPage = c_schemaRecord.oldSchemaPage; // ugly checkSchemaStatus(signal); }//execDICTSTARTREQ() @@ -1934,15 +2075,13 @@ Dbdict::masterRestart_checkSchemaStatusComplete(Signal* signal, Uint32 callbackData, Uint32 returnCode){ - c_schemaRecord.schemaPage = ZMAX_PAGES_OF_TABLE_DEFINITION; + c_schemaRecord.schemaPage = 0; // ugly + XSchemaFile * oldxsf = &c_schemaFile[c_schemaRecord.oldSchemaPage != 0]; + ndbrequire(oldxsf->noOfPages != 0); LinearSectionPtr ptr[3]; - - PageRecordPtr pagePtr; - c_pageRecordArray.getPtr(pagePtr, c_schemaRecord.oldSchemaPage); - - ptr[0].p = &pagePtr.p->word[0]; - ptr[0].sz = ZSIZE_OF_PAGES_IN_WORDS; + ptr[0].p = (Uint32*)&oldxsf->schemaPage[0]; + ptr[0].sz = oldxsf->noOfPages * NDB_SF_PAGE_SIZE_IN_WORDS; c_sendSchemaRecord.m_SCHEMAINFO_Counter = c_aliveNodes; NodeReceiverGroup rg(DBDICT, c_aliveNodes); @@ -1958,10 +2097,10 @@ Dbdict::masterRestart_checkSchemaStatusComplete(Signal* signal, 1, c); - PageRecordPtr newPagePtr; - c_pageRecordArray.getPtr(newPagePtr, c_schemaRecord.schemaPage); - memcpy(&newPagePtr.p->word[0], &pagePtr.p->word[0], - 4 * ZSIZE_OF_PAGES_IN_WORDS); + XSchemaFile * newxsf = &c_schemaFile[c_schemaRecord.schemaPage != 0]; + newxsf->noOfPages = oldxsf->noOfPages; + memcpy(&newxsf->schemaPage[0], &oldxsf->schemaPage[0], + oldxsf->noOfPages * NDB_SF_PAGE_SIZE); signal->theData[0] = getOwnNodeId(); sendSignal(reference(), GSN_SCHEMA_INFOCONF, signal, 1, JBB); @@ -1978,11 +2117,11 @@ Dbdict::execGET_SCHEMA_INFOREQ(Signal* signal){ LinearSectionPtr ptr[3]; - PageRecordPtr pagePtr; - c_pageRecordArray.getPtr(pagePtr, c_schemaRecord.schemaPage); + XSchemaFile * xsf = &c_schemaFile[c_schemaRecord.schemaPage != 0]; + ndbrequire(xsf->noOfPages != 0); - ptr[0].p = &pagePtr.p->word[0]; - ptr[0].sz = ZSIZE_OF_PAGES_IN_WORDS; + ptr[0].p = (Uint32*)&xsf->schemaPage[0]; + ptr[0].sz = xsf->noOfPages * NDB_SF_PAGE_SIZE_IN_WORDS; Callback c = { safe_cast(&Dbdict::sendSchemaComplete), 0 }; sendFragmentedSignal(ref, @@ -2024,12 +2163,22 @@ void Dbdict::execSCHEMA_INFO(Signal* signal) SegmentedSectionPtr schemaDataPtr; signal->getSection(schemaDataPtr, 0); - PageRecordPtr pagePtr; - c_pageRecordArray.getPtr(pagePtr, c_schemaRecord.schemaPage); - copy(&pagePtr.p->word[0], schemaDataPtr); + XSchemaFile * xsf = &c_schemaFile[c_schemaRecord.schemaPage != 0]; + ndbrequire(schemaDataPtr.sz % NDB_SF_PAGE_SIZE_IN_WORDS == 0); + xsf->noOfPages = schemaDataPtr.sz / NDB_SF_PAGE_SIZE_IN_WORDS; + copy((Uint32*)&xsf->schemaPage[0], schemaDataPtr); releaseSections(signal); + + SchemaFile * sf0 = &xsf->schemaPage[0]; + if (sf0->NdbVersion < NDB_SF_VERSION_5_0_6) { + bool ok = convertSchemaFileTo_5_0_6(xsf); + ndbrequire(ok); + } - validateChecksum((SchemaFile*)pagePtr.p); + validateChecksum(xsf); + + XSchemaFile * oldxsf = &c_schemaFile[c_schemaRecord.oldSchemaPage != 0]; + resizeSchemaFile(xsf, oldxsf->noOfPages); ndbrequire(signal->getSendersBlockRef() != reference()); @@ -2054,7 +2203,11 @@ Dbdict::restart_checkSchemaStatusComplete(Signal * signal, ndbrequire(c_writeSchemaRecord.inUse == false); c_writeSchemaRecord.inUse = true; + XSchemaFile * xsf = &c_schemaFile[c_schemaRecord.schemaPage != 0]; c_writeSchemaRecord.pageId = c_schemaRecord.schemaPage; + c_writeSchemaRecord.newFile = true; + c_writeSchemaRecord.firstPage = 0; + c_writeSchemaRecord.noOfPages = xsf->noOfPages; c_writeSchemaRecord.m_callback.m_callbackData = 0; c_writeSchemaRecord.m_callback.m_callbackFunction = safe_cast(&Dbdict::restart_writeSchemaConf); @@ -2103,20 +2256,18 @@ void Dbdict::execSCHEMA_INFOCONF(Signal* signal) void Dbdict::checkSchemaStatus(Signal* signal) { - PageRecordPtr pagePtr; - c_pageRecordArray.getPtr(pagePtr, c_schemaRecord.schemaPage); - - PageRecordPtr oldPagePtr; - c_pageRecordArray.getPtr(oldPagePtr, c_schemaRecord.oldSchemaPage); + XSchemaFile * newxsf = &c_schemaFile[c_schemaRecord.schemaPage != 0]; + XSchemaFile * oldxsf = &c_schemaFile[c_schemaRecord.oldSchemaPage != 0]; + ndbrequire(newxsf->noOfPages == oldxsf->noOfPages); + const Uint32 noOfEntries = newxsf->noOfPages * NDB_SF_PAGE_ENTRIES; - for (; c_restartRecord.activeTable < MAX_TABLES; + for (; c_restartRecord.activeTable < noOfEntries; c_restartRecord.activeTable++) { jam(); Uint32 tableId = c_restartRecord.activeTable; - SchemaFile::TableEntry *newEntry = getTableEntry(pagePtr.p, tableId); - SchemaFile::TableEntry *oldEntry = getTableEntry(oldPagePtr.p, tableId, - true); + SchemaFile::TableEntry *newEntry = getTableEntry(newxsf, tableId); + SchemaFile::TableEntry *oldEntry = getTableEntry(oldxsf, tableId); SchemaFile::TableState schemaState = (SchemaFile::TableState)newEntry->m_tableState; SchemaFile::TableState oldSchemaState = @@ -2247,7 +2398,7 @@ void Dbdict::checkSchemaStatus(Signal* signal) return; }//if } - ndbrequire(ok); + ndbrequire(ok); break; } case SchemaFile::DROP_TABLE_STARTED: @@ -2350,7 +2501,8 @@ Dbdict::restartCreateTab(Signal* signal, Uint32 tableId, if(file && !ERROR_INSERTED(6002)){ jam(); - c_readTableRecord.noOfPages = te->m_noOfPages; + c_readTableRecord.noOfPages = + DIV(te->m_info_words + ZPAGE_HEADER_SIZE, ZSIZE_OF_PAGES_IN_WORDS); c_readTableRecord.pageId = 0; c_readTableRecord.m_callback.m_callbackData = createTabPtr.p->key; c_readTableRecord.m_callback.m_callbackFunction = @@ -2406,7 +2558,7 @@ Dbdict::restartCreateTab_readTableConf(Signal* signal, c_readTableRecord.tableId, parseRecord.errorCode); progError(__LINE__, - ERR_INVALID_CONFIG, + NDBD_EXIT_INVALID_CONFIG, buf); ndbrequire(parseRecord.errorCode == 0); } @@ -2643,6 +2795,10 @@ void Dbdict::execNODE_FAILREP(Signal* signal) c_blockState = BS_NODE_FAILURE; ok = true; break; + case BS_NODE_RESTART: + jam(); + ok = true; + break; } ndbrequire(ok); @@ -2665,6 +2821,15 @@ void Dbdict::execNODE_FAILREP(Signal* signal) }//if }//for + /* + * NODE_FAILREP guarantees that no "in flight" signal from + * a dead node is accepted, and also that the job buffer contains + * no such (un-executed) signals. Therefore no DICT_UNLOCK_ORD + * from a dead node (leading to master crash) is possible after + * this clean-up removes the lock record. + */ + removeStaleDictLocks(signal, theFailedNodes); + }//execNODE_FAILREP() @@ -2733,6 +2898,12 @@ Dbdict::execCREATE_TABLE_REQ(Signal* signal){ break; } + if (c_blockState == BS_NODE_RESTART){ + jam(); + parseRecord.errorCode = CreateTableRef::BusyWithNR; + break; + } + if (c_blockState != BS_IDLE){ jam(); parseRecord.errorCode = CreateTableRef::Busy; @@ -2882,6 +3053,12 @@ Dbdict::execALTER_TABLE_REQ(Signal* signal) return; } + if(c_blockState == BS_NODE_RESTART){ + jam(); + alterTableRef(signal, req, AlterTableRef::BusyWithNR); + return; + } + if(c_blockState != BS_IDLE){ jam(); alterTableRef(signal, req, AlterTableRef::Busy); @@ -3256,8 +3433,8 @@ Dbdict::execALTER_TAB_REQ(Signal * signal) tabEntry.m_tableType = tablePtr.p->tableType; tabEntry.m_tableState = SchemaFile::ALTER_TABLE_COMMITTED; tabEntry.m_gcp = gci; - tabEntry.m_noOfPages = - DIV(tabInfoPtr.sz + ZPAGE_HEADER_SIZE, ZSIZE_OF_PAGES_IN_WORDS); + tabEntry.m_info_words = tabInfoPtr.sz; + memset(tabEntry.m_unused, 0, sizeof(tabEntry.m_unused)); Callback callback; callback.m_callbackData = senderData; @@ -3788,9 +3965,8 @@ Dbdict::execCREATE_FRAGMENTATION_CONF(Signal* signal){ /** * Update table version */ - PageRecordPtr pagePtr; - c_pageRecordArray.getPtr(pagePtr, c_schemaRecord.schemaPage); - SchemaFile::TableEntry * tabEntry = getTableEntry(pagePtr.p, tabPtr.i); + XSchemaFile * xsf = &c_schemaFile[c_schemaRecord.schemaPage != 0]; + SchemaFile::TableEntry * tabEntry = getTableEntry(xsf, tabPtr.i); tabPtr.p->tableVersion = create_table_inc_schema_version(tabEntry->m_tableVersion); @@ -4096,8 +4272,8 @@ Dbdict::createTab_prepare(Signal* signal, CreateTabReq * req){ tabEntry.m_tableType = tabPtr.p->tableType; tabEntry.m_tableState = SchemaFile::ADD_STARTED; tabEntry.m_gcp = gci; - tabEntry.m_noOfPages = - DIV(tabInfoPtr.sz + ZPAGE_HEADER_SIZE, ZSIZE_OF_PAGES_IN_WORDS); + tabEntry.m_info_words = tabInfoPtr.sz; + memset(tabEntry.m_unused, 0, sizeof(tabEntry.m_unused)); Callback callback; callback.m_callbackData = createTabPtr.p->key; @@ -4181,6 +4357,44 @@ Dbdict::createTab_dih(Signal* signal, sendSignal(DBDIH_REF, GSN_DIADDTABREQ, signal, DiAddTabReq::SignalLength, JBB); + + /** + * Create KeyDescriptor + */ + KeyDescriptor* desc= g_key_descriptor_pool.getPtr(tabPtr.i); + new (desc) KeyDescriptor(); + + Uint32 key = 0; + Uint32 tAttr = tabPtr.p->firstAttribute; + while (tAttr != RNIL) + { + jam(); + AttributeRecord* aRec = c_attributeRecordPool.getPtr(tAttr); + if (aRec->tupleKey) + { + desc->noOfKeyAttr ++; + desc->keyAttr[key].attributeDescriptor = aRec->attributeDescriptor; + + Uint32 csNumber = (aRec->extPrecision >> 16); + if(csNumber) + { + desc->keyAttr[key].charsetInfo = all_charsets[csNumber]; + ndbrequire(all_charsets[csNumber]); + desc->hasCharAttr = 1; + } + else + { + desc->keyAttr[key].charsetInfo = 0; + } + if(AttributeDescriptor::getDKey(aRec->attributeDescriptor)) + { + desc->noOfDistrKeys ++; + } + key++; + } + tAttr = aRec->nextAttrInTable; + } + ndbrequire(key == tabPtr.p->noOfPrimkey); } static @@ -4249,6 +4463,13 @@ Dbdict::execADD_FRAGREQ(Signal* signal) { Uint32 lhPageBits = 0; ::calcLHbits(&lhPageBits, &lhDistrBits, fragId, fragCount); + Uint64 maxRows = tabPtr.p->maxRowsLow + + (((Uint64)tabPtr.p->maxRowsHigh) << 32); + Uint64 minRows = tabPtr.p->minRowsLow + + (((Uint64)tabPtr.p->minRowsHigh) << 32); + maxRows = (maxRows + fragCount - 1) / fragCount; + minRows = (minRows + fragCount - 1) / fragCount; + { LqhFragReq* req = (LqhFragReq*)signal->getDataPtrSend(); req->senderData = senderData; @@ -4260,20 +4481,22 @@ Dbdict::execADD_FRAGREQ(Signal* signal) { req->maxLoadFactor = tabPtr.p->maxLoadFactor; req->minLoadFactor = tabPtr.p->minLoadFactor; req->kValue = tabPtr.p->kValue; - req->lh3DistrBits = lhDistrBits; - req->lh3PageBits = lhPageBits; + req->lh3DistrBits = 0; //lhDistrBits; + req->lh3PageBits = 0; //lhPageBits; req->noOfAttributes = tabPtr.p->noOfAttributes; - req->noOfNullAttributes = tabPtr.p->noOfNullAttr; - req->noOfPagesToPreAllocate = 0; + req->noOfNullAttributes = tabPtr.p->noOfNullBits; + req->maxRowsLow = maxRows & 0xFFFFFFFF; + req->maxRowsHigh = maxRows >> 32; + req->minRowsLow = minRows & 0xFFFFFFFF; + req->minRowsHigh = minRows >> 32; req->schemaVersion = tabPtr.p->tableVersion; Uint32 keyLen = tabPtr.p->tupKeyLength; - req->keyLength = keyLen > 8 ? 0 : keyLen; // Put this into ACC instead + req->keyLength = keyLen; // wl-2066 no more "long keys" req->nextLCP = lcpNo; req->noOfKeyAttr = tabPtr.p->noOfPrimkey; req->noOfNewAttr = 0; - // noOfCharsets passed to TUP in upper half - req->noOfNewAttr |= (tabPtr.p->noOfCharsets << 16); + req->noOfCharsets = tabPtr.p->noOfCharsets; req->checksumIndicator = 1; req->noOfAttributeGroups = 1; req->GCPIndicator = 0; @@ -4334,7 +4557,7 @@ Dbdict::sendLQHADDATTRREQ(Signal* signal, LqhAddAttrReq::Entry& entry = req->attributes[i]; entry.attrId = attrPtr.p->attributeId; entry.attrDescriptor = attrPtr.p->attributeDescriptor; - entry.extTypeInfo = attrPtr.p->extType; + entry.extTypeInfo = 0; // charset number passed to TUP, TUX in upper half entry.extTypeInfo |= (attrPtr.p->extPrecision & ~0xFFFF); if (tabPtr.p->isIndex()) { @@ -4476,10 +4699,12 @@ Dbdict::execTAB_COMMITCONF(Signal* signal){ signal->theData[3] = reference(); signal->theData[4] = (Uint32)tabPtr.p->tableType; signal->theData[5] = createTabPtr.p->key; - sendSignal(DBTC_REF, GSN_TC_SCHVERREQ, signal, 6, JBB); + signal->theData[6] = (Uint32)tabPtr.p->noOfPrimkey; + + sendSignal(DBTC_REF, GSN_TC_SCHVERREQ, signal, 7, JBB); return; } - + ndbrequire(false); } @@ -4532,8 +4757,8 @@ Dbdict::createTab_commit(Signal * signal, CreateTabReq * req){ tabEntry.m_tableType = tabPtr.p->tableType; tabEntry.m_tableState = SchemaFile::TABLE_ADD_COMMITTED; tabEntry.m_gcp = tabPtr.p->gciTableCreated; - tabEntry.m_noOfPages = - DIV(tabPtr.p->packedSize + ZPAGE_HEADER_SIZE, ZSIZE_OF_PAGES_IN_WORDS); + tabEntry.m_info_words = tabPtr.p->packedSize; + memset(tabEntry.m_unused, 0, sizeof(tabEntry.m_unused)); Callback callback; callback.m_callbackData = createTabPtr.p->key; @@ -4634,10 +4859,9 @@ Dbdict::createTab_dropComplete(Signal* signal, c_tableRecordPool.getPtr(tabPtr, createTabPtr.p->m_tablePtrI); releaseTableObject(tabPtr.i); - PageRecordPtr pagePtr; - c_pageRecordArray.getPtr(pagePtr, c_schemaRecord.schemaPage); - SchemaFile::TableEntry * tableEntry = getTableEntry(pagePtr.p, tabPtr.i); + XSchemaFile * xsf = &c_schemaFile[c_schemaRecord.schemaPage != 0]; + SchemaFile::TableEntry * tableEntry = getTableEntry(xsf, tabPtr.i); tableEntry->m_tableState = SchemaFile::DROP_TABLE_COMMITTED; //@todo check error @@ -4835,10 +5059,18 @@ void Dbdict::handleTabInfoInit(SimpleProperties::Reader & it, tablePtr.p->minLoadFactor = tableDesc.MinLoadFactor; tablePtr.p->maxLoadFactor = tableDesc.MaxLoadFactor; tablePtr.p->fragmentType = (DictTabInfo::FragmentType)tableDesc.FragmentType; - tablePtr.p->fragmentKeyType = (DictTabInfo::FragmentKeyType)tableDesc.FragmentKeyType; tablePtr.p->tableType = (DictTabInfo::TableType)tableDesc.TableType; tablePtr.p->kValue = tableDesc.TableKValue; tablePtr.p->fragmentCount = tableDesc.FragmentCount; + tablePtr.p->maxRowsLow = tableDesc.MaxRowsLow; + tablePtr.p->maxRowsHigh = tableDesc.MaxRowsHigh; + tablePtr.p->minRowsLow = tableDesc.MinRowsLow; + tablePtr.p->minRowsHigh = tableDesc.MinRowsHigh; + + Uint64 maxRows = + (((Uint64)tablePtr.p->maxRowsHigh) << 32) + tablePtr.p->maxRowsLow; + Uint64 minRows = + (((Uint64)tablePtr.p->minRowsHigh) << 32) + tablePtr.p->minRowsLow; tablePtr.p->frmLen = tableDesc.FrmLen; memcpy(tablePtr.p->frmData, tableDesc.FrmData, tableDesc.FrmLen); @@ -4884,6 +5116,7 @@ void Dbdict::handleTabInfo(SimpleProperties::Reader & it, Uint32 keyLength = 0; Uint32 attrCount = tablePtr.p->noOfAttributes; Uint32 nullCount = 0; + Uint32 nullBits = 0; Uint32 noOfCharsets = 0; Uint16 charsets[128]; Uint32 recordLength = 0; @@ -4936,19 +5169,24 @@ void Dbdict::handleTabInfo(SimpleProperties::Reader & it, attrPtr.p->attributeId = attrDesc.AttributeId; attrPtr.p->tupleKey = (keyCount + 1) * attrDesc.AttributeKeyFlag; - attrPtr.p->extType = attrDesc.AttributeExtType; attrPtr.p->extPrecision = attrDesc.AttributeExtPrecision; attrPtr.p->extScale = attrDesc.AttributeExtScale; attrPtr.p->extLength = attrDesc.AttributeExtLength; // charset in upper half of precision unsigned csNumber = (attrPtr.p->extPrecision >> 16); if (csNumber != 0) { + /* + * A new charset is first accessed here on this node. + * TODO use separate thread (e.g. via NDBFS) if need to load from file + */ CHARSET_INFO* cs = get_charset(csNumber, MYF(0)); if (cs == NULL) { parseP->errorCode = CreateTableRef::InvalidCharset; parseP->errorLine = __LINE__; return; } + // XXX should be done somewhere in mysql + all_charsets[cs->number] = cs; unsigned i = 0; while (i < noOfCharsets) { if (charsets[i] == csNumber) @@ -4966,9 +5204,7 @@ void Dbdict::handleTabInfo(SimpleProperties::Reader & it, } } - /** - * Ignore incoming old-style type and recompute it. - */ + // compute attribute size and array size bool translateOk = attrDesc.translateExtType(); tabRequire(translateOk, CreateTableRef::Inconsistency); @@ -4981,15 +5217,12 @@ void Dbdict::handleTabInfo(SimpleProperties::Reader & it, } Uint32 desc = 0; - AttributeDescriptor::setType(desc, attrDesc.AttributeType); + AttributeDescriptor::setType(desc, attrDesc.AttributeExtType); AttributeDescriptor::setSize(desc, attrDesc.AttributeSize); AttributeDescriptor::setArray(desc, attrDesc.AttributeArraySize); AttributeDescriptor::setNullable(desc, attrDesc.AttributeNullableFlag); - AttributeDescriptor::setDGroup(desc, attrDesc.AttributeDGroup); AttributeDescriptor::setDKey(desc, attrDesc.AttributeDKey); AttributeDescriptor::setPrimaryKey(desc, attrDesc.AttributeKeyFlag); - - AttributeDescriptor::setStoredInTup(desc, attrDesc.AttributeStoredInd); attrPtr.p->attributeDescriptor = desc; attrPtr.p->autoIncrement = attrDesc.AttributeAutoIncrement; strcpy(attrPtr.p->defaultValue, attrDesc.AttributeDefaultValue); @@ -5001,7 +5234,25 @@ void Dbdict::handleTabInfo(SimpleProperties::Reader & it, nullCount += attrDesc.AttributeNullableFlag; const Uint32 aSz = (1 << attrDesc.AttributeSize); - const Uint32 sz = ((aSz * attrDesc.AttributeArraySize) + 31) >> 5; + Uint32 sz; + if(aSz != 1) + { + sz = ((aSz * attrDesc.AttributeArraySize) + 31) >> 5; + } + else + { + sz = 0; + nullBits += attrDesc.AttributeArraySize; + } + + if(attrDesc.AttributeArraySize == 0) + { + parseP->errorCode = CreateTableRef::InvalidArraySize; + parseP->status = status; + parseP->errorKey = it.getKey(); + parseP->errorLine = __LINE__; + return; + } recordLength += sz; if(attrDesc.AttributeKeyFlag){ @@ -5030,6 +5281,7 @@ void Dbdict::handleTabInfo(SimpleProperties::Reader & it, tablePtr.p->noOfNullAttr = nullCount; tablePtr.p->noOfCharsets = noOfCharsets; tablePtr.p->tupKeyLength = keyLength; + tablePtr.p->noOfNullBits = nullCount + nullBits; tabRequire(recordLength<= MAX_TUPLE_SIZE_IN_WORDS, CreateTableRef::RecordTooBig); @@ -5103,7 +5355,10 @@ void Dbdict::execWAIT_GCP_REF(Signal* signal) /* ---------------------------------------------------------------- */ // Error Handling code needed /* ---------------------------------------------------------------- */ - progError(ref->errorCode, 0); + char buf[32]; + BaseString::snprintf(buf, sizeof(buf), "WAIT_GCP_REF ErrorCode=%d", + ref->errorCode); + progError(__LINE__, NDBD_EXIT_NDBREQUIRE, buf); }//execWAIT_GCP_REF() @@ -5134,6 +5389,12 @@ Dbdict::execDROP_TABLE_REQ(Signal* signal){ return; } + if(c_blockState == BS_NODE_RESTART){ + jam(); + dropTableRef(signal, req, DropTableRef::BusyWithNR); + return; + } + if(c_blockState != BS_IDLE){ jam(); dropTableRef(signal, req, DropTableRef::Busy); @@ -5492,21 +5753,22 @@ Dbdict::execPREP_DROP_TAB_REQ(Signal* signal){ /** * Modify schema */ - PageRecordPtr pagePtr; - c_pageRecordArray.getPtr(pagePtr, c_schemaRecord.schemaPage); - - SchemaFile::TableEntry * tableEntry = getTableEntry(pagePtr.p, tablePtr.i); + XSchemaFile * xsf = &c_schemaFile[c_schemaRecord.schemaPage != 0]; + SchemaFile::TableEntry * tableEntry = getTableEntry(xsf, tablePtr.i); SchemaFile::TableState tabState = (SchemaFile::TableState)tableEntry->m_tableState; ndbrequire(tabState == SchemaFile::TABLE_ADD_COMMITTED || tabState == SchemaFile::ALTER_TABLE_COMMITTED); tableEntry->m_tableState = SchemaFile::DROP_TABLE_STARTED; - computeChecksum((SchemaFile*)pagePtr.p); + computeChecksum(xsf, tablePtr.i / NDB_SF_PAGE_ENTRIES); ndbrequire(c_writeSchemaRecord.inUse == false); c_writeSchemaRecord.inUse = true; c_writeSchemaRecord.pageId = c_schemaRecord.schemaPage; + c_writeSchemaRecord.newFile = false; + c_writeSchemaRecord.firstPage = tablePtr.i / NDB_SF_PAGE_ENTRIES; + c_writeSchemaRecord.noOfPages = 1; c_writeSchemaRecord.m_callback.m_callbackData = dropTabPtr.p->key; c_writeSchemaRecord.m_callback.m_callbackFunction = safe_cast(&Dbdict::prepDropTab_writeSchemaConf); @@ -5667,20 +5929,20 @@ Dbdict::dropTab_complete(Signal* signal, /** * Write to schema file */ - PageRecordPtr pagePtr; - c_pageRecordArray.getPtr(pagePtr, c_schemaRecord.schemaPage); - - SchemaFile::TableEntry * tableEntry = getTableEntry(pagePtr.p, tableId); + XSchemaFile * xsf = &c_schemaFile[c_schemaRecord.schemaPage != 0]; + SchemaFile::TableEntry * tableEntry = getTableEntry(xsf, tableId); SchemaFile::TableState tabState = (SchemaFile::TableState)tableEntry->m_tableState; ndbrequire(tabState == SchemaFile::DROP_TABLE_STARTED); tableEntry->m_tableState = SchemaFile::DROP_TABLE_COMMITTED; - computeChecksum((SchemaFile*)pagePtr.p); + computeChecksum(xsf, tableId / NDB_SF_PAGE_ENTRIES); ndbrequire(c_writeSchemaRecord.inUse == false); c_writeSchemaRecord.inUse = true; c_writeSchemaRecord.pageId = c_schemaRecord.schemaPage; + c_writeSchemaRecord.firstPage = tableId / NDB_SF_PAGE_ENTRIES; + c_writeSchemaRecord.noOfPages = 1; c_writeSchemaRecord.m_callback.m_callbackData = dropTabPtr.p->key; c_writeSchemaRecord.m_callback.m_callbackFunction = safe_cast(&Dbdict::dropTab_writeSchemaConf); @@ -5864,7 +6126,10 @@ void Dbdict::sendGET_TABLEID_REF(Signal* signal, void Dbdict::execGET_TABINFOREQ(Signal* signal) { jamEntry(); - if(!assembleFragments(signal)) { return; } + if(!assembleFragments(signal)) + { + return; + } GetTabInfoReq * const req = (GetTabInfoReq *)&signal->theData[0]; @@ -6449,11 +6714,15 @@ void Dbdict::createIndex_slavePrepare(Signal* signal, OpCreateIndexPtr opPtr) { jam(); + if (ERROR_INSERTED(6006) && ! opPtr.p->m_isMaster) { + ndbrequire(false); + } } void Dbdict::createIndex_toCreateTable(Signal* signal, OpCreateIndexPtr opPtr) { + Uint32 attrid_map[MAX_ATTRIBUTES_IN_INDEX]; Uint32 k; jam(); const CreateIndxReq* const req = &opPtr.p->m_request; @@ -6523,39 +6792,49 @@ Dbdict::createIndex_toCreateTable(Signal* signal, OpCreateIndexPtr opPtr) // tree node size in words (make configurable later) indexPtr.p->tupKeyLength = MAX_TTREE_NODE_SIZE; } - // hash index attributes must currently be in table order - Uint32 prevAttrId = RNIL; + + AttributeMask mask; + mask.clear(); for (k = 0; k < opPtr.p->m_attrList.sz; k++) { jam(); - bool found = false; - for (Uint32 tAttr = tablePtr.p->firstAttribute; tAttr != RNIL; ) { - AttributeRecord* aRec = c_attributeRecordPool.getPtr(tAttr); - tAttr = aRec->nextAttrInTable; - if (aRec->attributeId != opPtr.p->m_attrList.id[k]) + unsigned current_id= opPtr.p->m_attrList.id[k]; + AttributeRecord* aRec= NULL; + Uint32 tAttr= tablePtr.p->firstAttribute; + for (; tAttr != RNIL; tAttr= aRec->nextAttrInTable) + { + aRec = c_attributeRecordPool.getPtr(tAttr); + if (aRec->attributeId != current_id) continue; jam(); - found = true; - const Uint32 a = aRec->attributeDescriptor; - if (indexPtr.p->isHashIndex()) { - const Uint32 s1 = AttributeDescriptor::getSize(a); - const Uint32 s2 = AttributeDescriptor::getArraySize(a); - indexPtr.p->tupKeyLength += ((1 << s1) * s2 + 31) >> 5; - } + break; } - if (! found) { + if (tAttr == RNIL) { jam(); opPtr.p->m_errorCode = CreateIndxRef::BadRequestType; opPtr.p->m_errorLine = __LINE__; return; } - if (indexPtr.p->isHashIndex() && - k > 0 && prevAttrId >= opPtr.p->m_attrList.id[k]) { + if (mask.get(current_id)) + { jam(); - opPtr.p->m_errorCode = CreateIndxRef::InvalidAttributeOrder; + opPtr.p->m_errorCode = CreateIndxRef::DuplicateAttributes; opPtr.p->m_errorLine = __LINE__; return; } - prevAttrId = opPtr.p->m_attrList.id[k]; + mask.set(current_id); + + const Uint32 a = aRec->attributeDescriptor; + unsigned kk= k; + if (indexPtr.p->isHashIndex()) { + const Uint32 s1 = AttributeDescriptor::getSize(a); + const Uint32 s2 = AttributeDescriptor::getArraySize(a); + indexPtr.p->tupKeyLength += ((1 << s1) * s2 + 31) >> 5; + // reorder the attributes according to the tableid order + // for unque indexes + for (; kk > 0 && current_id < attrid_map[kk-1]>>16; kk--) + attrid_map[kk]= attrid_map[kk-1]; + } + attrid_map[kk]= k | (current_id << 16); } indexPtr.p->noOfPrimkey = indexPtr.p->noOfAttributes; // plus concatenated primary table key attribute @@ -6576,15 +6855,21 @@ Dbdict::createIndex_toCreateTable(Signal* signal, OpCreateIndexPtr opPtr) AttributeRecordPtr aRecPtr; c_attributeRecordPool.getPtr(aRecPtr, tablePtr.p->firstAttribute); for (k = 0; k < opPtr.p->m_attrList.sz; k++) { + // insert the attributes in the order decided above in attrid_map + // k is new order, current_id is in previous order + // ToDo: make sure "current_id" is stored with the table and + // passed up to NdbDictionary + unsigned current_id= opPtr.p->m_attrList.id[attrid_map[k] & 0xffff]; jam(); for (Uint32 tAttr = tablePtr.p->firstAttribute; tAttr != RNIL; ) { AttributeRecord* aRec = c_attributeRecordPool.getPtr(tAttr); tAttr = aRec->nextAttrInTable; - if (aRec->attributeId != opPtr.p->m_attrList.id[k]) + if (aRec->attributeId != current_id) continue; jam(); const Uint32 a = aRec->attributeDescriptor; bool isNullable = AttributeDescriptor::getNullable(a); + Uint32 attrType = AttributeDescriptor::getType(a); w.add(DictTabInfo::AttributeName, aRec->attributeName); w.add(DictTabInfo::AttributeId, k); if (indexPtr.p->isHashIndex()) { @@ -6595,9 +6880,7 @@ Dbdict::createIndex_toCreateTable(Signal* signal, OpCreateIndexPtr opPtr) w.add(DictTabInfo::AttributeKeyFlag, (Uint32)false); w.add(DictTabInfo::AttributeNullableFlag, (Uint32)isNullable); } - w.add(DictTabInfo::AttributeStoredInd, (Uint32)DictTabInfo::Stored); - // ext type overrides - w.add(DictTabInfo::AttributeExtType, aRec->extType); + w.add(DictTabInfo::AttributeExtType, attrType); w.add(DictTabInfo::AttributeExtPrecision, aRec->extPrecision); w.add(DictTabInfo::AttributeExtScale, aRec->extScale); w.add(DictTabInfo::AttributeExtLength, aRec->extLength); @@ -6610,9 +6893,7 @@ Dbdict::createIndex_toCreateTable(Signal* signal, OpCreateIndexPtr opPtr) w.add(DictTabInfo::AttributeName, "NDB$PK"); w.add(DictTabInfo::AttributeId, opPtr.p->m_attrList.sz); w.add(DictTabInfo::AttributeKeyFlag, (Uint32)false); - w.add(DictTabInfo::AttributeStoredInd, (Uint32)DictTabInfo::Stored); w.add(DictTabInfo::AttributeNullableFlag, (Uint32)false); - // ext type overrides w.add(DictTabInfo::AttributeExtType, (Uint32)DictTabInfo::ExtUnsigned); w.add(DictTabInfo::AttributeExtLength, tablePtr.p->tupKeyLength); w.add(DictTabInfo::AttributeEnd, (Uint32)true); @@ -6623,9 +6904,7 @@ Dbdict::createIndex_toCreateTable(Signal* signal, OpCreateIndexPtr opPtr) w.add(DictTabInfo::AttributeName, "NDB$TNODE"); w.add(DictTabInfo::AttributeId, opPtr.p->m_attrList.sz); w.add(DictTabInfo::AttributeKeyFlag, (Uint32)true); - w.add(DictTabInfo::AttributeStoredInd, (Uint32)DictTabInfo::Stored); w.add(DictTabInfo::AttributeNullableFlag, (Uint32)false); - // ext type overrides w.add(DictTabInfo::AttributeExtType, (Uint32)DictTabInfo::ExtUnsigned); w.add(DictTabInfo::AttributeExtLength, indexPtr.p->tupKeyLength); w.add(DictTabInfo::AttributeEnd, (Uint32)true); @@ -7124,2171 +7403,6 @@ Dbdict::dropIndex_sendReply(Signal* signal, OpDropIndexPtr opPtr, sendSignal(rep->getUserRef(), gsn, signal, length, JBB); } -/***************************************************** - * - * Util signalling - * - *****************************************************/ - -int -Dbdict::sendSignalUtilReq(Callback *pcallback, - BlockReference ref, - GlobalSignalNumber gsn, - Signal* signal, - Uint32 length, - JobBufferLevel jbuf, - LinearSectionPtr ptr[3], - Uint32 noOfSections) -{ - jam(); - EVENT_TRACE; - OpSignalUtilPtr utilRecPtr; - - // Seize a Util Send record - if (!c_opSignalUtil.seize(utilRecPtr)) { - // Failed to allocate util record - return -1; - } - utilRecPtr.p->m_callback = *pcallback; - - // should work for all util signal classes - UtilPrepareReq *req = (UtilPrepareReq*)signal->getDataPtrSend(); - utilRecPtr.p->m_userData = req->getSenderData(); - req->setSenderData(utilRecPtr.i); - - if (ptr) { - jam(); - sendSignal(ref, gsn, signal, length, jbuf, ptr, noOfSections); - } else { - jam(); - sendSignal(ref, gsn, signal, length, jbuf); - } - - return 0; -} - -int -Dbdict::recvSignalUtilReq(Signal* signal, Uint32 returnCode) -{ - jam(); - EVENT_TRACE; - UtilPrepareConf * const req = (UtilPrepareConf*)signal->getDataPtr(); - OpSignalUtilPtr utilRecPtr; - utilRecPtr.i = req->getSenderData(); - if ((utilRecPtr.p = c_opSignalUtil.getPtr(utilRecPtr.i)) == NULL) { - jam(); - return -1; - } - - req->setSenderData(utilRecPtr.p->m_userData); - Callback c = utilRecPtr.p->m_callback; - c_opSignalUtil.release(utilRecPtr); - - execute(signal, c, returnCode); - return 0; -} - -void Dbdict::execUTIL_PREPARE_CONF(Signal *signal) -{ - jamEntry(); - EVENT_TRACE; - ndbrequire(recvSignalUtilReq(signal, 0) == 0); -} - -void -Dbdict::execUTIL_PREPARE_REF(Signal *signal) -{ - jamEntry(); - EVENT_TRACE; - ndbrequire(recvSignalUtilReq(signal, 1) == 0); -} - -void Dbdict::execUTIL_EXECUTE_CONF(Signal *signal) -{ - jamEntry(); - EVENT_TRACE; - ndbrequire(recvSignalUtilReq(signal, 0) == 0); -} - -void Dbdict::execUTIL_EXECUTE_REF(Signal *signal) -{ - jamEntry(); - EVENT_TRACE; - -#ifdef EVENT_DEBUG - UtilExecuteRef * ref = (UtilExecuteRef *)signal->getDataPtrSend(); - - ndbout_c("execUTIL_EXECUTE_REF"); - ndbout_c("senderData %u",ref->getSenderData()); - ndbout_c("errorCode %u",ref->getErrorCode()); - ndbout_c("TCErrorCode %u",ref->getTCErrorCode()); -#endif - - ndbrequire(recvSignalUtilReq(signal, 1) == 0); -} -void Dbdict::execUTIL_RELEASE_CONF(Signal *signal) -{ - jamEntry(); - EVENT_TRACE; - ndbrequire(false); - ndbrequire(recvSignalUtilReq(signal, 0) == 0); -} -void Dbdict::execUTIL_RELEASE_REF(Signal *signal) -{ - jamEntry(); - EVENT_TRACE; - ndbrequire(false); - ndbrequire(recvSignalUtilReq(signal, 1) == 0); -} - -/** - * MODULE: Create event - * - * Create event in DICT. - * - * - * Request type in CREATE_EVNT signals: - * - * Signalflow see Dbdict.txt - * - */ - -/***************************************************************** - * - * Systable stuff - * - */ - -const Uint32 Dbdict::sysTab_NDBEVENTS_0_szs[EVENT_SYSTEM_TABLE_LENGTH] = { - sizeof(((sysTab_NDBEVENTS_0*)0)->NAME), - sizeof(((sysTab_NDBEVENTS_0*)0)->EVENT_TYPE), - sizeof(((sysTab_NDBEVENTS_0*)0)->TABLE_NAME), - sizeof(((sysTab_NDBEVENTS_0*)0)->ATTRIBUTE_MASK), - sizeof(((sysTab_NDBEVENTS_0*)0)->SUBID), - sizeof(((sysTab_NDBEVENTS_0*)0)->SUBKEY) -}; - -void -Dbdict::prepareTransactionEventSysTable (Callback *pcallback, - Signal* signal, - Uint32 senderData, - UtilPrepareReq::OperationTypeValue prepReq) -{ - // find table id for event system table - TableRecord keyRecord; - strcpy(keyRecord.tableName, EVENT_SYSTEM_TABLE_NAME); - - TableRecordPtr tablePtr; - c_tableRecordHash.find(tablePtr, keyRecord); - - ndbrequire(tablePtr.i != RNIL); // system table must exist - - Uint32 tableId = tablePtr.p->tableId; /* System table */ - Uint32 noAttr = tablePtr.p->noOfAttributes; - ndbrequire(noAttr == EVENT_SYSTEM_TABLE_LENGTH); - - switch (prepReq) { - case UtilPrepareReq::Update: - case UtilPrepareReq::Insert: - case UtilPrepareReq::Write: - case UtilPrepareReq::Read: - jam(); - break; - case UtilPrepareReq::Delete: - jam(); - noAttr = 1; // only involves Primary key which should be the first - break; - } - prepareUtilTransaction(pcallback, signal, senderData, tableId, NULL, - prepReq, noAttr, NULL, NULL); -} - -void -Dbdict::prepareUtilTransaction(Callback *pcallback, - Signal* signal, - Uint32 senderData, - Uint32 tableId, - const char* tableName, - UtilPrepareReq::OperationTypeValue prepReq, - Uint32 noAttr, - Uint32 attrIds[], - const char *attrNames[]) -{ - jam(); - EVENT_TRACE; - - UtilPrepareReq * utilPrepareReq = - (UtilPrepareReq *)signal->getDataPtrSend(); - - utilPrepareReq->setSenderRef(reference()); - utilPrepareReq->setSenderData(senderData); - - const Uint32 pageSizeInWords = 128; - Uint32 propPage[pageSizeInWords]; - LinearWriter w(&propPage[0],128); - w.first(); - w.add(UtilPrepareReq::NoOfOperations, 1); - w.add(UtilPrepareReq::OperationType, prepReq); - if (tableName) { - jam(); - w.add(UtilPrepareReq::TableName, tableName); - } else { - jam(); - w.add(UtilPrepareReq::TableId, tableId); - } - for(Uint32 i = 0; i < noAttr; i++) - if (tableName) { - jam(); - w.add(UtilPrepareReq::AttributeName, attrNames[i]); - } else { - if (attrIds) { - jam(); - w.add(UtilPrepareReq::AttributeId, attrIds[i]); - } else { - jam(); - w.add(UtilPrepareReq::AttributeId, i); - } - } -#ifdef EVENT_DEBUG - // Debugging - SimplePropertiesLinearReader reader(propPage, w.getWordsUsed()); - printf("Dict::prepareInsertTransactions: Sent SimpleProperties:\n"); - reader.printAll(ndbout); -#endif - - struct LinearSectionPtr sectionsPtr[UtilPrepareReq::NoOfSections]; - sectionsPtr[UtilPrepareReq::PROPERTIES_SECTION].p = propPage; - sectionsPtr[UtilPrepareReq::PROPERTIES_SECTION].sz = w.getWordsUsed(); - - sendSignalUtilReq(pcallback, DBUTIL_REF, GSN_UTIL_PREPARE_REQ, signal, - UtilPrepareReq::SignalLength, JBB, - sectionsPtr, UtilPrepareReq::NoOfSections); -} - -/***************************************************************** - * - * CREATE_EVNT_REQ has three types RT_CREATE, RT_GET (from user) - * and RT_DICT_AFTER_GET send from master DICT to slaves - * - * This function just dscpaches these to - * - * createEvent_RT_USER_CREATE - * createEvent_RT_USER_GET - * createEvent_RT_DICT_AFTER_GET - * - * repectively - * - */ - -void -Dbdict::execCREATE_EVNT_REQ(Signal* signal) -{ - jamEntry(); - -#if 0 - { - SafeCounterHandle handle; - { - SafeCounter tmp(c_counterMgr, handle); - tmp.init<CreateEvntRef>(CMVMI, GSN_DUMP_STATE_ORD, /* senderData */ 13); - tmp.clearWaitingFor(); - tmp.setWaitingFor(3); - ndbrequire(!tmp.done()); - ndbout_c("Allocted"); - } - ndbrequire(!handle.done()); - { - SafeCounter tmp(c_counterMgr, handle); - tmp.clearWaitingFor(3); - ndbrequire(tmp.done()); - ndbout_c("Deallocted"); - } - ndbrequire(handle.done()); - } - { - NodeBitmask nodes; - nodes.clear(); - - nodes.set(2); - nodes.set(3); - nodes.set(4); - nodes.set(5); - - { - Uint32 i = 0; - while((i = nodes.find(i)) != NodeBitmask::NotFound){ - ndbout_c("1 Node id = %u", i); - i++; - } - } - - NodeReceiverGroup rg(DBDICT, nodes); - RequestTracker rt2; - ndbrequire(rt2.done()); - ndbrequire(!rt2.hasRef()); - ndbrequire(!rt2.hasConf()); - rt2.init<CreateEvntRef>(c_counterMgr, rg, GSN_CREATE_EVNT_REF, 13); - - RequestTracker rt3; - rt3.init<CreateEvntRef>(c_counterMgr, rg, GSN_CREATE_EVNT_REF, 13); - - ndbrequire(!rt2.done()); - ndbrequire(!rt3.done()); - - rt2.reportRef(c_counterMgr, 2); - rt3.reportConf(c_counterMgr, 2); - - ndbrequire(!rt2.done()); - ndbrequire(!rt3.done()); - - rt2.reportConf(c_counterMgr, 3); - rt3.reportConf(c_counterMgr, 3); - - ndbrequire(!rt2.done()); - ndbrequire(!rt3.done()); - - rt2.reportConf(c_counterMgr, 4); - rt3.reportConf(c_counterMgr, 4); - - ndbrequire(!rt2.done()); - ndbrequire(!rt3.done()); - - rt2.reportConf(c_counterMgr, 5); - rt3.reportConf(c_counterMgr, 5); - - ndbrequire(rt2.done()); - ndbrequire(rt3.done()); - } -#endif - - if (! assembleFragments(signal)) { - jam(); - return; - } - - CreateEvntReq *req = (CreateEvntReq*)signal->getDataPtr(); - const CreateEvntReq::RequestType requestType = req->getRequestType(); - const Uint32 requestFlag = req->getRequestFlag(); - - OpCreateEventPtr evntRecPtr; - // Seize a Create Event record - if (!c_opCreateEvent.seize(evntRecPtr)) { - // Failed to allocate event record - jam(); - releaseSections(signal); - - CreateEvntRef * ret = (CreateEvntRef *)signal->getDataPtrSend(); - ret->senderRef = reference(); - ret->setErrorCode(CreateEvntRef::SeizeError); - ret->setErrorLine(__LINE__); - ret->setErrorNode(reference()); - sendSignal(signal->senderBlockRef(), GSN_CREATE_EVNT_REF, signal, - CreateEvntRef::SignalLength, JBB); - return; - } - -#ifdef EVENT_DEBUG - ndbout_c("DBDICT::execCREATE_EVNT_REQ from %u evntRecId = (%d)", refToNode(signal->getSendersBlockRef()), evntRecPtr.i); -#endif - - ndbrequire(req->getUserRef() == signal->getSendersBlockRef()); - - evntRecPtr.p->init(req,this); - - if (requestFlag & (Uint32)CreateEvntReq::RT_DICT_AFTER_GET) { - jam(); - EVENT_TRACE; - createEvent_RT_DICT_AFTER_GET(signal, evntRecPtr); - return; - } - if (requestType == CreateEvntReq::RT_USER_GET) { - jam(); - EVENT_TRACE; - createEvent_RT_USER_GET(signal, evntRecPtr); - return; - } - if (requestType == CreateEvntReq::RT_USER_CREATE) { - jam(); - EVENT_TRACE; - createEvent_RT_USER_CREATE(signal, evntRecPtr); - return; - } - -#ifdef EVENT_DEBUG - ndbout << "Dbdict.cpp: Dbdict::execCREATE_EVNT_REQ other" << endl; -#endif - jam(); - releaseSections(signal); - - evntRecPtr.p->m_errorCode = CreateEvntRef::Undefined; - evntRecPtr.p->m_errorLine = __LINE__; - evntRecPtr.p->m_errorNode = reference(); - - createEvent_sendReply(signal, evntRecPtr); -} - -/******************************************************************** - * - * Event creation - * - *****************************************************************/ - -void -Dbdict::createEvent_RT_USER_CREATE(Signal* signal, OpCreateEventPtr evntRecPtr){ - jam(); - evntRecPtr.p->m_request.setUserRef(signal->senderBlockRef()); - -#ifdef EVENT_DEBUG - ndbout << "Dbdict.cpp: Dbdict::execCREATE_EVNT_REQ RT_USER" << endl; - char buf[128] = {0}; - AttributeMask mask = evntRecPtr.p->m_request.getAttrListBitmask(); - mask.getText(buf); - ndbout_c("mask = %s", buf); -#endif - - // Interpret the long signal - - SegmentedSectionPtr ssPtr; - // save name and event properties - signal->getSection(ssPtr, CreateEvntReq::EVENT_NAME_SECTION); - - SimplePropertiesSectionReader r0(ssPtr, getSectionSegmentPool()); -#ifdef EVENT_DEBUG - r0.printAll(ndbout); -#endif - // event name - if ((!r0.first()) || - (r0.getValueType() != SimpleProperties::StringValue) || - (r0.getValueLen() <= 0)) { - jam(); - releaseSections(signal); - - evntRecPtr.p->m_errorCode = CreateEvntRef::Undefined; - evntRecPtr.p->m_errorLine = __LINE__; - evntRecPtr.p->m_errorNode = reference(); - - createEvent_sendReply(signal, evntRecPtr); - return; - } - r0.getString(evntRecPtr.p->m_eventRec.NAME); - { - int len = strlen(evntRecPtr.p->m_eventRec.NAME); - memset(evntRecPtr.p->m_eventRec.NAME+len, 0, MAX_TAB_NAME_SIZE-len); -#ifdef EVENT_DEBUG - printf("CreateEvntReq::RT_USER_CREATE; EventName %s, len %u\n", - evntRecPtr.p->m_eventRec.NAME, len); - for(int i = 0; i < MAX_TAB_NAME_SIZE/4; i++) - printf("H'%.8x ", ((Uint32*)evntRecPtr.p->m_eventRec.NAME)[i]); - printf("\n"); -#endif - } - // table name - if ((!r0.next()) || - (r0.getValueType() != SimpleProperties::StringValue) || - (r0.getValueLen() <= 0)) { - jam(); - releaseSections(signal); - - evntRecPtr.p->m_errorCode = CreateEvntRef::Undefined; - evntRecPtr.p->m_errorLine = __LINE__; - evntRecPtr.p->m_errorNode = reference(); - - createEvent_sendReply(signal, evntRecPtr); - return; - } - r0.getString(evntRecPtr.p->m_eventRec.TABLE_NAME); - { - int len = strlen(evntRecPtr.p->m_eventRec.TABLE_NAME); - memset(evntRecPtr.p->m_eventRec.TABLE_NAME+len, 0, MAX_TAB_NAME_SIZE-len); - } - -#ifdef EVENT_DEBUG - ndbout_c("event name: %s",evntRecPtr.p->m_eventRec.NAME); - ndbout_c("table name: %s",evntRecPtr.p->m_eventRec.TABLE_NAME); -#endif - - releaseSections(signal); - - // Send request to SUMA - - CreateSubscriptionIdReq * sumaIdReq = - (CreateSubscriptionIdReq *)signal->getDataPtrSend(); - - // make sure we save the original sender for later - sumaIdReq->senderData = evntRecPtr.i; -#ifdef EVENT_DEBUG - ndbout << "sumaIdReq->senderData = " << sumaIdReq->senderData << endl; -#endif - sendSignal(SUMA_REF, GSN_CREATE_SUBID_REQ, signal, - CreateSubscriptionIdReq::SignalLength, JBB); - // we should now return in either execCREATE_SUBID_CONF - // or execCREATE_SUBID_REF -} - -void Dbdict::execCREATE_SUBID_REF(Signal* signal) -{ - jamEntry(); - EVENT_TRACE; - CreateSubscriptionIdRef * const ref = - (CreateSubscriptionIdRef *)signal->getDataPtr(); - OpCreateEventPtr evntRecPtr; - - evntRecPtr.i = ref->senderData; - ndbrequire((evntRecPtr.p = c_opCreateEvent.getPtr(evntRecPtr.i)) != NULL); - - evntRecPtr.p->m_errorCode = CreateEvntRef::Undefined; - evntRecPtr.p->m_errorLine = __LINE__; - evntRecPtr.p->m_errorNode = reference(); - - createEvent_sendReply(signal, evntRecPtr); -} - -void Dbdict::execCREATE_SUBID_CONF(Signal* signal) -{ - jamEntry(); - EVENT_TRACE; - - CreateSubscriptionIdConf const * sumaIdConf = - (CreateSubscriptionIdConf *)signal->getDataPtr(); - - Uint32 evntRecId = sumaIdConf->senderData; - OpCreateEvent *evntRec; - - ndbrequire((evntRec = c_opCreateEvent.getPtr(evntRecId)) != NULL); - - evntRec->m_request.setEventId(sumaIdConf->subscriptionId); - evntRec->m_request.setEventKey(sumaIdConf->subscriptionKey); - - releaseSections(signal); - - Callback c = { safe_cast(&Dbdict::createEventUTIL_PREPARE), 0 }; - - prepareTransactionEventSysTable(&c, signal, evntRecId, - UtilPrepareReq::Insert); -} - -void -Dbdict::createEventComplete_RT_USER_CREATE(Signal* signal, - OpCreateEventPtr evntRecPtr){ - jam(); - createEvent_sendReply(signal, evntRecPtr); -} - -/********************************************************************* - * - * UTIL_PREPARE, UTIL_EXECUTE - * - * insert or read systable NDB$EVENTS_0 - */ - -void interpretUtilPrepareErrorCode(UtilPrepareRef::ErrorCode errorCode, - bool& temporary, Uint32& line) -{ - switch (errorCode) { - case UtilPrepareRef::NO_ERROR: - jam(); - line = __LINE__; - EVENT_TRACE; - break; - case UtilPrepareRef::PREPARE_SEIZE_ERROR: - jam(); - temporary = true; - line = __LINE__; - EVENT_TRACE; - break; - case UtilPrepareRef::PREPARE_PAGES_SEIZE_ERROR: - jam(); - line = __LINE__; - EVENT_TRACE; - break; - case UtilPrepareRef::PREPARED_OPERATION_SEIZE_ERROR: - jam(); - line = __LINE__; - EVENT_TRACE; - break; - case UtilPrepareRef::DICT_TAB_INFO_ERROR: - jam(); - line = __LINE__; - EVENT_TRACE; - break; - case UtilPrepareRef::MISSING_PROPERTIES_SECTION: - jam(); - line = __LINE__; - EVENT_TRACE; - break; - default: - jam(); - line = __LINE__; - EVENT_TRACE; - break; - } -} - -void -Dbdict::createEventUTIL_PREPARE(Signal* signal, - Uint32 callbackData, - Uint32 returnCode) -{ - jam(); - EVENT_TRACE; - if (returnCode == 0) { - UtilPrepareConf* const req = (UtilPrepareConf*)signal->getDataPtr(); - OpCreateEventPtr evntRecPtr; - jam(); - evntRecPtr.i = req->getSenderData(); - const Uint32 prepareId = req->getPrepareId(); - - ndbrequire((evntRecPtr.p = c_opCreateEvent.getPtr(evntRecPtr.i)) != NULL); - - Callback c = { safe_cast(&Dbdict::createEventUTIL_EXECUTE), 0 }; - - switch (evntRecPtr.p->m_requestType) { - case CreateEvntReq::RT_USER_GET: -#ifdef EVENT_DEBUG - printf("get type = %d\n", CreateEvntReq::RT_USER_GET); -#endif - jam(); - executeTransEventSysTable(&c, signal, - evntRecPtr.i, evntRecPtr.p->m_eventRec, - prepareId, UtilPrepareReq::Read); - break; - case CreateEvntReq::RT_USER_CREATE: -#ifdef EVENT_DEBUG - printf("create type = %d\n", CreateEvntReq::RT_USER_CREATE); -#endif - { - evntRecPtr.p->m_eventRec.EVENT_TYPE = evntRecPtr.p->m_request.getEventType(); - AttributeMask m = evntRecPtr.p->m_request.getAttrListBitmask(); - memcpy(evntRecPtr.p->m_eventRec.ATTRIBUTE_MASK, &m, - sizeof(evntRecPtr.p->m_eventRec.ATTRIBUTE_MASK)); - evntRecPtr.p->m_eventRec.SUBID = evntRecPtr.p->m_request.getEventId(); - evntRecPtr.p->m_eventRec.SUBKEY = evntRecPtr.p->m_request.getEventKey(); - } - jam(); - executeTransEventSysTable(&c, signal, - evntRecPtr.i, evntRecPtr.p->m_eventRec, - prepareId, UtilPrepareReq::Insert); - break; - default: -#ifdef EVENT_DEBUG - printf("type = %d\n", evntRecPtr.p->m_requestType); - printf("bet type = %d\n", CreateEvntReq::RT_USER_GET); - printf("create type = %d\n", CreateEvntReq::RT_USER_CREATE); -#endif - ndbrequire(false); - } - } else { // returnCode != 0 - UtilPrepareRef* const ref = (UtilPrepareRef*)signal->getDataPtr(); - - const UtilPrepareRef::ErrorCode errorCode = - (UtilPrepareRef::ErrorCode)ref->getErrorCode(); - - OpCreateEventPtr evntRecPtr; - evntRecPtr.i = ref->getSenderData(); - ndbrequire((evntRecPtr.p = c_opCreateEvent.getPtr(evntRecPtr.i)) != NULL); - - bool temporary = false; - interpretUtilPrepareErrorCode(errorCode, - temporary, evntRecPtr.p->m_errorLine); - if (temporary) { - evntRecPtr.p->m_errorCode = - CreateEvntRef::makeTemporary(CreateEvntRef::Undefined); - } - - if (evntRecPtr.p->m_errorCode == 0) { - evntRecPtr.p->m_errorCode = CreateEvntRef::Undefined; - } - evntRecPtr.p->m_errorNode = reference(); - - createEvent_sendReply(signal, evntRecPtr); - } -} - -void Dbdict::executeTransEventSysTable(Callback *pcallback, Signal *signal, - const Uint32 ptrI, - sysTab_NDBEVENTS_0& m_eventRec, - const Uint32 prepareId, - UtilPrepareReq::OperationTypeValue prepReq) -{ - jam(); - const Uint32 noAttr = EVENT_SYSTEM_TABLE_LENGTH; - Uint32 total_len = 0; - - Uint32* attrHdr = signal->theData + 25; - Uint32* attrPtr = attrHdr; - - Uint32 id=0; - // attribute 0 event name: Primary Key - { - AttributeHeader::init(attrPtr, id, sysTab_NDBEVENTS_0_szs[id]/4); - total_len += sysTab_NDBEVENTS_0_szs[id]; - attrPtr++; id++; - } - - switch (prepReq) { - case UtilPrepareReq::Read: - jam(); - EVENT_TRACE; - // no more - while ( id < noAttr ) - AttributeHeader::init(attrPtr++, id++, 0); - ndbrequire(id == (Uint32) noAttr); - break; - case UtilPrepareReq::Insert: - jam(); - EVENT_TRACE; - while ( id < noAttr ) { - AttributeHeader::init(attrPtr, id, sysTab_NDBEVENTS_0_szs[id]/4); - total_len += sysTab_NDBEVENTS_0_szs[id]; - attrPtr++; id++; - } - ndbrequire(id == (Uint32) noAttr); - break; - case UtilPrepareReq::Delete: - ndbrequire(id == 1); - break; - default: - ndbrequire(false); - } - - LinearSectionPtr headerPtr; - LinearSectionPtr dataPtr; - - headerPtr.p = attrHdr; - headerPtr.sz = noAttr; - - dataPtr.p = (Uint32*)&m_eventRec; - dataPtr.sz = total_len/4; - - ndbrequire((total_len == sysTab_NDBEVENTS_0_szs[0]) || - (total_len == sizeof(sysTab_NDBEVENTS_0))); - -#if 0 - printf("Header size %u\n", headerPtr.sz); - for(int i = 0; i < (int)headerPtr.sz; i++) - printf("H'%.8x ", attrHdr[i]); - printf("\n"); - - printf("Data size %u\n", dataPtr.sz); - for(int i = 0; i < (int)dataPtr.sz; i++) - printf("H'%.8x ", dataPage[i]); - printf("\n"); -#endif - - executeTransaction(pcallback, signal, - ptrI, - prepareId, - id, - headerPtr, - dataPtr); -} - -void Dbdict::executeTransaction(Callback *pcallback, - Signal* signal, - Uint32 senderData, - Uint32 prepareId, - Uint32 noAttr, - LinearSectionPtr headerPtr, - LinearSectionPtr dataPtr) -{ - jam(); - EVENT_TRACE; - - UtilExecuteReq * utilExecuteReq = - (UtilExecuteReq *)signal->getDataPtrSend(); - - utilExecuteReq->setSenderRef(reference()); - utilExecuteReq->setSenderData(senderData); - utilExecuteReq->setPrepareId(prepareId); - utilExecuteReq->setReleaseFlag(); // must be done after setting prepareId - -#if 0 - printf("Header size %u\n", headerPtr.sz); - for(int i = 0; i < (int)headerPtr.sz; i++) - printf("H'%.8x ", headerBuffer[i]); - printf("\n"); - - printf("Data size %u\n", dataPtr.sz); - for(int i = 0; i < (int)dataPtr.sz; i++) - printf("H'%.8x ", dataBuffer[i]); - printf("\n"); -#endif - - struct LinearSectionPtr sectionsPtr[UtilExecuteReq::NoOfSections]; - sectionsPtr[UtilExecuteReq::HEADER_SECTION].p = headerPtr.p; - sectionsPtr[UtilExecuteReq::HEADER_SECTION].sz = noAttr; - sectionsPtr[UtilExecuteReq::DATA_SECTION].p = dataPtr.p; - sectionsPtr[UtilExecuteReq::DATA_SECTION].sz = dataPtr.sz; - - sendSignalUtilReq(pcallback, DBUTIL_REF, GSN_UTIL_EXECUTE_REQ, signal, - UtilExecuteReq::SignalLength, JBB, - sectionsPtr, UtilExecuteReq::NoOfSections); -} - -void Dbdict::parseReadEventSys(Signal* signal, sysTab_NDBEVENTS_0& m_eventRec) -{ - SegmentedSectionPtr headerPtr, dataPtr; - jam(); - signal->getSection(headerPtr, UtilExecuteReq::HEADER_SECTION); - SectionReader headerReader(headerPtr, getSectionSegmentPool()); - - signal->getSection(dataPtr, UtilExecuteReq::DATA_SECTION); - SectionReader dataReader(dataPtr, getSectionSegmentPool()); - - AttributeHeader header; - Uint32 *dst = (Uint32*)&m_eventRec; - - for (int i = 0; i < EVENT_SYSTEM_TABLE_LENGTH; i++) { - headerReader.getWord((Uint32 *)&header); - int sz = header.getDataSize(); - for (int i=0; i < sz; i++) - dataReader.getWord(dst++); - } - - ndbrequire( ((char*)dst-(char*)&m_eventRec) == sizeof(m_eventRec) ); - - releaseSections(signal); -} - -void Dbdict::createEventUTIL_EXECUTE(Signal *signal, - Uint32 callbackData, - Uint32 returnCode) -{ - jam(); - EVENT_TRACE; - if (returnCode == 0) { - // Entry into system table all set - UtilExecuteConf* const conf = (UtilExecuteConf*)signal->getDataPtr(); - jam(); - OpCreateEventPtr evntRecPtr; - evntRecPtr.i = conf->getSenderData(); - - ndbrequire((evntRecPtr.p = c_opCreateEvent.getPtr(evntRecPtr.i)) != NULL); - OpCreateEvent *evntRec = evntRecPtr.p; - - switch (evntRec->m_requestType) { - case CreateEvntReq::RT_USER_GET: { -#ifdef EVENT_DEBUG - printf("get type = %d\n", CreateEvntReq::RT_USER_GET); -#endif - parseReadEventSys(signal, evntRecPtr.p->m_eventRec); - - evntRec->m_request.setEventType(evntRecPtr.p->m_eventRec.EVENT_TYPE); - evntRec->m_request.setAttrListBitmask(*(AttributeMask*)evntRecPtr.p->m_eventRec.ATTRIBUTE_MASK); - evntRec->m_request.setEventId(evntRecPtr.p->m_eventRec.SUBID); - evntRec->m_request.setEventKey(evntRecPtr.p->m_eventRec.SUBKEY); - -#ifdef EVENT_DEBUG - printf("EventName: %s\n", evntRec->m_eventRec.NAME); - printf("TableName: %s\n", evntRec->m_eventRec.TABLE_NAME); -#endif - - // find table id for event table - TableRecord keyRecord; - strcpy(keyRecord.tableName, evntRecPtr.p->m_eventRec.TABLE_NAME); - - TableRecordPtr tablePtr; - c_tableRecordHash.find(tablePtr, keyRecord); - - if (tablePtr.i == RNIL) { - jam(); - evntRecPtr.p->m_errorCode = CreateEvntRef::Undefined; - evntRecPtr.p->m_errorLine = __LINE__; - evntRecPtr.p->m_errorNode = reference(); - - createEvent_sendReply(signal, evntRecPtr); - return; - } - - evntRec->m_request.setTableId(tablePtr.p->tableId); - - createEventComplete_RT_USER_GET(signal, evntRecPtr); - return; - } - case CreateEvntReq::RT_USER_CREATE: { -#ifdef EVENT_DEBUG - printf("create type = %d\n", CreateEvntReq::RT_USER_CREATE); -#endif - jam(); - createEventComplete_RT_USER_CREATE(signal, evntRecPtr); - return; - } - break; - default: - ndbrequire(false); - } - } else { // returnCode != 0 - UtilExecuteRef * const ref = (UtilExecuteRef *)signal->getDataPtr(); - OpCreateEventPtr evntRecPtr; - evntRecPtr.i = ref->getSenderData(); - ndbrequire((evntRecPtr.p = c_opCreateEvent.getPtr(evntRecPtr.i)) != NULL); - jam(); - evntRecPtr.p->m_errorNode = reference(); - evntRecPtr.p->m_errorLine = __LINE__; - - switch (ref->getErrorCode()) { - case UtilExecuteRef::TCError: - switch (ref->getTCErrorCode()) { - case ZNOT_FOUND: - jam(); - evntRecPtr.p->m_errorCode = CreateEvntRef::EventNotFound; - break; - case ZALREADYEXIST: - jam(); - evntRecPtr.p->m_errorCode = CreateEvntRef::EventExists; - break; - default: - jam(); - evntRecPtr.p->m_errorCode = CreateEvntRef::UndefinedTCError; - break; - } - break; - default: - jam(); - evntRecPtr.p->m_errorCode = CreateEvntRef::Undefined; - break; - } - - createEvent_sendReply(signal, evntRecPtr); - } -} - -/*********************************************************************** - * - * NdbEventOperation, reading systable, creating event in suma - * - */ - -void -Dbdict::createEvent_RT_USER_GET(Signal* signal, OpCreateEventPtr evntRecPtr){ - jam(); - EVENT_TRACE; -#ifdef EVENT_PH2_DEBUG - ndbout_c("DBDICT(Coordinator) got GSN_CREATE_EVNT_REQ::RT_USER_GET evntRecPtr.i = (%d), ref = %u", evntRecPtr.i, evntRecPtr.p->m_request.getUserRef()); -#endif - - SegmentedSectionPtr ssPtr; - - signal->getSection(ssPtr, 0); - - SimplePropertiesSectionReader r0(ssPtr, getSectionSegmentPool()); -#ifdef EVENT_DEBUG - r0.printAll(ndbout); -#endif - if ((!r0.first()) || - (r0.getValueType() != SimpleProperties::StringValue) || - (r0.getValueLen() <= 0)) { - jam(); - releaseSections(signal); - - evntRecPtr.p->m_errorCode = CreateEvntRef::Undefined; - evntRecPtr.p->m_errorLine = __LINE__; - evntRecPtr.p->m_errorNode = reference(); - - createEvent_sendReply(signal, evntRecPtr); - return; - } - - r0.getString(evntRecPtr.p->m_eventRec.NAME); - int len = strlen(evntRecPtr.p->m_eventRec.NAME); - memset(evntRecPtr.p->m_eventRec.NAME+len, 0, MAX_TAB_NAME_SIZE-len); - - releaseSections(signal); - - Callback c = { safe_cast(&Dbdict::createEventUTIL_PREPARE), 0 }; - - prepareTransactionEventSysTable(&c, signal, evntRecPtr.i, - UtilPrepareReq::Read); - /* - * Will read systable and fill an OpCreateEventPtr - * and return below - */ -} - -void -Dbdict::createEventComplete_RT_USER_GET(Signal* signal, - OpCreateEventPtr evntRecPtr){ - jam(); - - // Send to oneself and the other DICT's - CreateEvntReq * req = (CreateEvntReq *)signal->getDataPtrSend(); - - *req = evntRecPtr.p->m_request; - req->senderRef = reference(); - req->senderData = evntRecPtr.i; - - req->addRequestFlag(CreateEvntReq::RT_DICT_AFTER_GET); - -#ifdef EVENT_PH2_DEBUG - ndbout_c("DBDICT(Coordinator) sending GSN_CREATE_EVNT_REQ::RT_DICT_AFTER_GET to DBDICT participants evntRecPtr.i = (%d)", evntRecPtr.i); -#endif - - NodeReceiverGroup rg(DBDICT, c_aliveNodes); - RequestTracker & p = evntRecPtr.p->m_reqTracker; - p.init<CreateEvntRef>(c_counterMgr, rg, GSN_CREATE_EVNT_REF, evntRecPtr.i); - - sendSignal(rg, GSN_CREATE_EVNT_REQ, signal, CreateEvntReq::SignalLength, JBB); -} - -void -Dbdict::createEvent_nodeFailCallback(Signal* signal, Uint32 eventRecPtrI, - Uint32 returnCode){ - OpCreateEventPtr evntRecPtr; - c_opCreateEvent.getPtr(evntRecPtr, eventRecPtrI); - createEvent_sendReply(signal, evntRecPtr); -} - -void Dbdict::execCREATE_EVNT_REF(Signal* signal) -{ - jamEntry(); - EVENT_TRACE; - CreateEvntRef * const ref = (CreateEvntRef *)signal->getDataPtr(); - OpCreateEventPtr evntRecPtr; - - evntRecPtr.i = ref->getUserData(); - - ndbrequire((evntRecPtr.p = c_opCreateEvent.getPtr(evntRecPtr.i)) != NULL); - -#ifdef EVENT_PH2_DEBUG - ndbout_c("DBDICT(Coordinator) got GSN_CREATE_EVNT_REF evntRecPtr.i = (%d)", evntRecPtr.i); -#endif - - if (ref->errorCode == CreateEvntRef::NF_FakeErrorREF){ - jam(); - evntRecPtr.p->m_reqTracker.ignoreRef(c_counterMgr, refToNode(ref->senderRef)); - } else { - jam(); - evntRecPtr.p->m_reqTracker.reportRef(c_counterMgr, refToNode(ref->senderRef)); - } - createEvent_sendReply(signal, evntRecPtr); - - return; -} - -void Dbdict::execCREATE_EVNT_CONF(Signal* signal) -{ - jamEntry(); - EVENT_TRACE; - CreateEvntConf * const conf = (CreateEvntConf *)signal->getDataPtr(); - OpCreateEventPtr evntRecPtr; - - evntRecPtr.i = conf->getUserData(); - - ndbrequire((evntRecPtr.p = c_opCreateEvent.getPtr(evntRecPtr.i)) != NULL); - -#ifdef EVENT_PH2_DEBUG - ndbout_c("DBDICT(Coordinator) got GSN_CREATE_EVNT_CONF evntRecPtr.i = (%d)", evntRecPtr.i); -#endif - - evntRecPtr.p->m_reqTracker.reportConf(c_counterMgr, refToNode(conf->senderRef)); - - // we will only have a valid tablename if it the master DICT sending this - // but that's ok - LinearSectionPtr ptr[1]; - ptr[0].p = (Uint32 *)evntRecPtr.p->m_eventRec.TABLE_NAME; - ptr[0].sz = - (strlen(evntRecPtr.p->m_eventRec.TABLE_NAME)+4)/4; // to make sure we have a null - - createEvent_sendReply(signal, evntRecPtr, ptr, 1); - - return; -} - -/************************************************ - * - * Participant stuff - * - */ - -void -Dbdict::createEvent_RT_DICT_AFTER_GET(Signal* signal, OpCreateEventPtr evntRecPtr){ - jam(); - evntRecPtr.p->m_request.setUserRef(signal->senderBlockRef()); - -#ifdef EVENT_PH2_DEBUG - ndbout_c("DBDICT(Participant) got CREATE_EVNT_REQ::RT_DICT_AFTER_GET evntRecPtr.i = (%d)", evntRecPtr.i); -#endif - - // the signal comes from the DICT block that got the first user request! - // This code runs on all DICT nodes, including oneself - - // Seize a Create Event record, the Coordinator will now have two seized - // but that's ok, it's like a recursion - - SubCreateReq * sumaReq = (SubCreateReq *)signal->getDataPtrSend(); - - sumaReq->subscriberRef = reference(); // reference to DICT - sumaReq->subscriberData = evntRecPtr.i; - sumaReq->subscriptionId = evntRecPtr.p->m_request.getEventId(); - sumaReq->subscriptionKey = evntRecPtr.p->m_request.getEventKey(); - sumaReq->subscriptionType = SubCreateReq::TableEvent; - sumaReq->tableId = evntRecPtr.p->m_request.getTableId(); - -#ifdef EVENT_PH2_DEBUG - ndbout_c("sending GSN_SUB_CREATE_REQ"); -#endif - - sendSignal(SUMA_REF, GSN_SUB_CREATE_REQ, signal, - SubCreateReq::SignalLength+1 /*to get table Id*/, JBB); -} - -void Dbdict::execSUB_CREATE_REF(Signal* signal) -{ - jamEntry(); - EVENT_TRACE; - SubCreateRef * const ref = (SubCreateRef *)signal->getDataPtr(); - OpCreateEventPtr evntRecPtr; - - evntRecPtr.i = ref->subscriberData; - ndbrequire((evntRecPtr.p = c_opCreateEvent.getPtr(evntRecPtr.i)) != NULL); - -#ifdef EVENT_PH2_DEBUG - ndbout_c("DBDICT(Participant) got SUB_CREATE_REF evntRecPtr.i = (%d)", evntRecPtr.i); -#endif - - if (ref->err == GrepError::SUBSCRIPTION_ID_NOT_UNIQUE) { - jam(); -#ifdef EVENT_PH2_DEBUG - ndbout_c("SUBSCRIPTION_ID_NOT_UNIQUE"); -#endif - createEvent_sendReply(signal, evntRecPtr); - return; - } - -#ifdef EVENT_PH2_DEBUG - ndbout_c("Other error"); -#endif - - evntRecPtr.p->m_errorCode = CreateEvntRef::Undefined; - evntRecPtr.p->m_errorLine = __LINE__; - evntRecPtr.p->m_errorNode = reference(); - - createEvent_sendReply(signal, evntRecPtr); -} - -void Dbdict::execSUB_CREATE_CONF(Signal* signal) -{ - jamEntry(); - EVENT_TRACE; - - SubCreateConf * const sumaConf = (SubCreateConf *)signal->getDataPtr(); - - const Uint32 subscriptionId = sumaConf->subscriptionId; - const Uint32 subscriptionKey = sumaConf->subscriptionKey; - const Uint32 evntRecId = sumaConf->subscriberData; - - OpCreateEvent *evntRec; - ndbrequire((evntRec = c_opCreateEvent.getPtr(evntRecId)) != NULL); - -#ifdef EVENT_PH2_DEBUG - ndbout_c("DBDICT(Participant) got SUB_CREATE_CONF evntRecPtr.i = (%d)", evntRecId); -#endif - - SubSyncReq *sumaSync = (SubSyncReq *)signal->getDataPtrSend(); - - sumaSync->subscriptionId = subscriptionId; - sumaSync->subscriptionKey = subscriptionKey; - sumaSync->part = (Uint32) SubscriptionData::MetaData; - sumaSync->subscriberData = evntRecId; - - sendSignal(SUMA_REF, GSN_SUB_SYNC_REQ, signal, - SubSyncReq::SignalLength, JBB); -} - -void Dbdict::execSUB_SYNC_REF(Signal* signal) -{ - jamEntry(); - EVENT_TRACE; - SubSyncRef * const ref = (SubSyncRef *)signal->getDataPtr(); - OpCreateEventPtr evntRecPtr; - - evntRecPtr.i = ref->subscriberData; - ndbrequire((evntRecPtr.p = c_opCreateEvent.getPtr(evntRecPtr.i)) != NULL); - - evntRecPtr.p->m_errorCode = CreateEvntRef::Undefined; - evntRecPtr.p->m_errorLine = __LINE__; - evntRecPtr.p->m_errorNode = reference(); - - createEvent_sendReply(signal, evntRecPtr); -} - -void Dbdict::execSUB_SYNC_CONF(Signal* signal) -{ - jamEntry(); - EVENT_TRACE; - - SubSyncConf * const sumaSyncConf = (SubSyncConf *)signal->getDataPtr(); - - // Uint32 subscriptionId = sumaSyncConf->subscriptionId; - // Uint32 subscriptionKey = sumaSyncConf->subscriptionKey; - OpCreateEventPtr evntRecPtr; - - evntRecPtr.i = sumaSyncConf->subscriberData; - ndbrequire((evntRecPtr.p = c_opCreateEvent.getPtr(evntRecPtr.i)) != NULL); - - ndbrequire(sumaSyncConf->part == (Uint32)SubscriptionData::MetaData); - - createEvent_sendReply(signal, evntRecPtr); -} - -/**************************************************** - * - * common create reply method - * - *******************************************************/ - -void Dbdict::createEvent_sendReply(Signal* signal, - OpCreateEventPtr evntRecPtr, - LinearSectionPtr *ptr, int noLSP) -{ - jam(); - EVENT_TRACE; - - // check if we're ready to sent reply - // if we are the master dict we might be waiting for conf/ref - - if (!evntRecPtr.p->m_reqTracker.done()) { - jam(); - return; // there's more to come - } - - if (evntRecPtr.p->m_reqTracker.hasRef()) { - ptr = NULL; // we don't want to return anything if there's an error - if (!evntRecPtr.p->hasError()) { - evntRecPtr.p->m_errorCode = CreateEvntRef::Undefined; - evntRecPtr.p->m_errorLine = __LINE__; - evntRecPtr.p->m_errorNode = reference(); - jam(); - } else - jam(); - } - - // reference to API if master DICT - // else reference to master DICT - Uint32 senderRef = evntRecPtr.p->m_request.getUserRef(); - Uint32 signalLength; - Uint32 gsn; - - if (evntRecPtr.p->hasError()) { - jam(); - EVENT_TRACE; - CreateEvntRef * ret = (CreateEvntRef *)signal->getDataPtrSend(); - - ret->setEventId(evntRecPtr.p->m_request.getEventId()); - ret->setEventKey(evntRecPtr.p->m_request.getEventKey()); - ret->setUserData(evntRecPtr.p->m_request.getUserData()); - ret->senderRef = reference(); - ret->setTableId(evntRecPtr.p->m_request.getTableId()); - ret->setEventType(evntRecPtr.p->m_request.getEventType()); - ret->setRequestType(evntRecPtr.p->m_request.getRequestType()); - - ret->setErrorCode(evntRecPtr.p->m_errorCode); - ret->setErrorLine(evntRecPtr.p->m_errorLine); - ret->setErrorNode(evntRecPtr.p->m_errorNode); - - signalLength = CreateEvntRef::SignalLength; -#ifdef EVENT_PH2_DEBUG - ndbout_c("DBDICT sending GSN_CREATE_EVNT_REF to evntRecPtr.i = (%d) node = %u ref = %u", evntRecPtr.i, refToNode(senderRef), senderRef); - ndbout_c("errorCode = %u", evntRecPtr.p->m_errorCode); - ndbout_c("errorLine = %u", evntRecPtr.p->m_errorLine); -#endif - gsn = GSN_CREATE_EVNT_REF; - - } else { - jam(); - EVENT_TRACE; - CreateEvntConf * evntConf = (CreateEvntConf *)signal->getDataPtrSend(); - - evntConf->setEventId(evntRecPtr.p->m_request.getEventId()); - evntConf->setEventKey(evntRecPtr.p->m_request.getEventKey()); - evntConf->setUserData(evntRecPtr.p->m_request.getUserData()); - evntConf->senderRef = reference(); - evntConf->setTableId(evntRecPtr.p->m_request.getTableId()); - evntConf->setAttrListBitmask(evntRecPtr.p->m_request.getAttrListBitmask()); - evntConf->setEventType(evntRecPtr.p->m_request.getEventType()); - evntConf->setRequestType(evntRecPtr.p->m_request.getRequestType()); - - signalLength = CreateEvntConf::SignalLength; -#ifdef EVENT_PH2_DEBUG - ndbout_c("DBDICT sending GSN_CREATE_EVNT_CONF to evntRecPtr.i = (%d) node = %u ref = %u", evntRecPtr.i, refToNode(senderRef), senderRef); -#endif - gsn = GSN_CREATE_EVNT_CONF; - } - - if (ptr) { - jam(); - sendSignal(senderRef, gsn, signal, signalLength, JBB, ptr, noLSP); - } else { - jam(); - sendSignal(senderRef, gsn, signal, signalLength, JBB); - } - - c_opCreateEvent.release(evntRecPtr); -} - -/*************************************************************/ - -/******************************************************************** - * - * Start event - * - *******************************************************************/ - -void Dbdict::execSUB_START_REQ(Signal* signal) -{ - jamEntry(); - - Uint32 origSenderRef = signal->senderBlockRef(); - - OpSubEventPtr subbPtr; - if (!c_opSubEvent.seize(subbPtr)) { - SubStartRef * ref = (SubStartRef *)signal->getDataPtrSend(); - { // fix - Uint32 subcriberRef = ((SubStartReq*)signal->getDataPtr())->subscriberRef; - ref->subscriberRef = subcriberRef; - } - jam(); - // ret->setErrorCode(SubStartRef::SeizeError); - // ret->setErrorLine(__LINE__); - // ret->setErrorNode(reference()); - ref->senderRef = reference(); - ref->setTemporary(SubStartRef::Busy); - - sendSignal(origSenderRef, GSN_SUB_START_REF, signal, - SubStartRef::SignalLength2, JBB); - return; - } - - { - const SubStartReq* req = (SubStartReq*) signal->getDataPtr(); - subbPtr.p->m_senderRef = req->senderRef; - subbPtr.p->m_senderData = req->senderData; - subbPtr.p->m_errorCode = 0; - } - - if (refToBlock(origSenderRef) != DBDICT) { - /* - * Coordinator - */ - jam(); - - subbPtr.p->m_senderRef = origSenderRef; // not sure if API sets correctly - NodeReceiverGroup rg(DBDICT, c_aliveNodes); - RequestTracker & p = subbPtr.p->m_reqTracker; - p.init<SubStartRef>(c_counterMgr, rg, GSN_SUB_START_REF, subbPtr.i); - - SubStartReq* req = (SubStartReq*) signal->getDataPtrSend(); - - req->senderRef = reference(); - req->senderData = subbPtr.i; - -#ifdef EVENT_PH3_DEBUG - ndbout_c("DBDICT(Coordinator) sending GSN_SUB_START_REQ to DBDICT participants subbPtr.i = (%d)", subbPtr.i); -#endif - - sendSignal(rg, GSN_SUB_START_REQ, signal, SubStartReq::SignalLength2, JBB); - return; - } - /* - * Participant - */ - ndbrequire(refToBlock(origSenderRef) == DBDICT); - - { - SubStartReq* req = (SubStartReq*) signal->getDataPtrSend(); - - req->senderRef = reference(); - req->senderData = subbPtr.i; - -#ifdef EVENT_PH3_DEBUG - ndbout_c("DBDICT(Participant) sending GSN_SUB_START_REQ to SUMA subbPtr.i = (%d)", subbPtr.i); -#endif - sendSignal(SUMA_REF, GSN_SUB_START_REQ, signal, SubStartReq::SignalLength2, JBB); - } -} - -void Dbdict::execSUB_START_REF(Signal* signal) -{ - jamEntry(); - - const SubStartRef* ref = (SubStartRef*) signal->getDataPtr(); - Uint32 senderRef = ref->senderRef; - - OpSubEventPtr subbPtr; - c_opSubEvent.getPtr(subbPtr, ref->senderData); - - if (refToBlock(senderRef) == SUMA) { - /* - * Participant - */ - jam(); - -#ifdef EVENT_PH3_DEBUG - ndbout_c("DBDICT(Participant) got GSN_SUB_START_REF = (%d)", subbPtr.i); -#endif - - if (ref->isTemporary()){ - jam(); - SubStartReq* req = (SubStartReq*)signal->getDataPtrSend(); - { // fix - Uint32 subscriberRef = ref->subscriberRef; - req->subscriberRef = subscriberRef; - } - req->senderRef = reference(); - req->senderData = subbPtr.i; - sendSignal(SUMA_REF, GSN_SUB_START_REQ, - signal, SubStartReq::SignalLength2, JBB); - } else { - jam(); - - SubStartRef* ref = (SubStartRef*) signal->getDataPtrSend(); - ref->senderRef = reference(); - ref->senderData = subbPtr.p->m_senderData; - sendSignal(subbPtr.p->m_senderRef, GSN_SUB_START_REF, - signal, SubStartRef::SignalLength2, JBB); - c_opSubEvent.release(subbPtr); - } - return; - } - /* - * Coordinator - */ - ndbrequire(refToBlock(senderRef) == DBDICT); -#ifdef EVENT_PH3_DEBUG - ndbout_c("DBDICT(Coordinator) got GSN_SUB_START_REF = (%d)", subbPtr.i); -#endif - if (ref->errorCode == SubStartRef::NF_FakeErrorREF){ - jam(); - subbPtr.p->m_reqTracker.ignoreRef(c_counterMgr, refToNode(senderRef)); - } else { - jam(); - subbPtr.p->m_reqTracker.reportRef(c_counterMgr, refToNode(senderRef)); - } - completeSubStartReq(signal,subbPtr.i,0); -} - -void Dbdict::execSUB_START_CONF(Signal* signal) -{ - jamEntry(); - - const SubStartConf* conf = (SubStartConf*) signal->getDataPtr(); - Uint32 senderRef = conf->senderRef; - - OpSubEventPtr subbPtr; - c_opSubEvent.getPtr(subbPtr, conf->senderData); - - if (refToBlock(senderRef) == SUMA) { - /* - * Participant - */ - jam(); - SubStartConf* conf = (SubStartConf*) signal->getDataPtrSend(); - -#ifdef EVENT_PH3_DEBUG - ndbout_c("DBDICT(Participant) got GSN_SUB_START_CONF = (%d)", subbPtr.i); -#endif - - conf->senderRef = reference(); - conf->senderData = subbPtr.p->m_senderData; - - sendSignal(subbPtr.p->m_senderRef, GSN_SUB_START_CONF, - signal, SubStartConf::SignalLength2, JBB); - c_opSubEvent.release(subbPtr); - return; - } - /* - * Coordinator - */ - ndbrequire(refToBlock(senderRef) == DBDICT); -#ifdef EVENT_PH3_DEBUG - ndbout_c("DBDICT(Coordinator) got GSN_SUB_START_CONF = (%d)", subbPtr.i); -#endif - subbPtr.p->m_reqTracker.reportConf(c_counterMgr, refToNode(senderRef)); - completeSubStartReq(signal,subbPtr.i,0); -} - -/* - * Coordinator - */ -void Dbdict::completeSubStartReq(Signal* signal, - Uint32 ptrI, - Uint32 returnCode){ - jam(); - - OpSubEventPtr subbPtr; - c_opSubEvent.getPtr(subbPtr, ptrI); - - if (!subbPtr.p->m_reqTracker.done()){ - jam(); - return; - } - - if (subbPtr.p->m_reqTracker.hasRef()) { - jam(); -#ifdef EVENT_DEBUG - ndbout_c("SUB_START_REF"); -#endif - sendSignal(subbPtr.p->m_senderRef, GSN_SUB_START_REF, - signal, SubStartRef::SignalLength, JBB); - if (subbPtr.p->m_reqTracker.hasConf()) { - // stopStartedNodes(signal); - } - c_opSubEvent.release(subbPtr); - return; - } -#ifdef EVENT_DEBUG - ndbout_c("SUB_START_CONF"); -#endif - sendSignal(subbPtr.p->m_senderRef, GSN_SUB_START_CONF, - signal, SubStartConf::SignalLength, JBB); - c_opSubEvent.release(subbPtr); -} - -/******************************************************************** - * - * Stop event - * - *******************************************************************/ - -void Dbdict::execSUB_STOP_REQ(Signal* signal) -{ - jamEntry(); - - Uint32 origSenderRef = signal->senderBlockRef(); - - OpSubEventPtr subbPtr; - if (!c_opSubEvent.seize(subbPtr)) { - SubStopRef * ref = (SubStopRef *)signal->getDataPtrSend(); - jam(); - // ret->setErrorCode(SubStartRef::SeizeError); - // ret->setErrorLine(__LINE__); - // ret->setErrorNode(reference()); - ref->senderRef = reference(); - ref->setTemporary(SubStopRef::Busy); - - sendSignal(origSenderRef, GSN_SUB_STOP_REF, signal, - SubStopRef::SignalLength, JBB); - return; - } - - { - const SubStopReq* req = (SubStopReq*) signal->getDataPtr(); - subbPtr.p->m_senderRef = req->senderRef; - subbPtr.p->m_senderData = req->senderData; - subbPtr.p->m_errorCode = 0; - } - - if (refToBlock(origSenderRef) != DBDICT) { - /* - * Coordinator - */ - jam(); -#ifdef EVENT_DEBUG - ndbout_c("SUB_STOP_REQ 1"); -#endif - subbPtr.p->m_senderRef = origSenderRef; // not sure if API sets correctly - NodeReceiverGroup rg(DBDICT, c_aliveNodes); - RequestTracker & p = subbPtr.p->m_reqTracker; - p.init<SubStopRef>(c_counterMgr, rg, GSN_SUB_STOP_REF, subbPtr.i); - - SubStopReq* req = (SubStopReq*) signal->getDataPtrSend(); - - req->senderRef = reference(); - req->senderData = subbPtr.i; - - sendSignal(rg, GSN_SUB_STOP_REQ, signal, SubStopReq::SignalLength, JBB); - return; - } - /* - * Participant - */ -#ifdef EVENT_DEBUG - ndbout_c("SUB_STOP_REQ 2"); -#endif - ndbrequire(refToBlock(origSenderRef) == DBDICT); - { - SubStopReq* req = (SubStopReq*) signal->getDataPtrSend(); - - req->senderRef = reference(); - req->senderData = subbPtr.i; - - sendSignal(SUMA_REF, GSN_SUB_STOP_REQ, signal, SubStopReq::SignalLength, JBB); - } -} - -void Dbdict::execSUB_STOP_REF(Signal* signal) -{ - jamEntry(); - const SubStopRef* ref = (SubStopRef*) signal->getDataPtr(); - Uint32 senderRef = ref->senderRef; - - OpSubEventPtr subbPtr; - c_opSubEvent.getPtr(subbPtr, ref->senderData); - - if (refToBlock(senderRef) == SUMA) { - /* - * Participant - */ - jam(); - if (ref->isTemporary()){ - jam(); - SubStopReq* req = (SubStopReq*)signal->getDataPtrSend(); - req->senderRef = reference(); - req->senderData = subbPtr.i; - sendSignal(SUMA_REF, GSN_SUB_STOP_REQ, - signal, SubStopReq::SignalLength, JBB); - } else { - jam(); - SubStopRef* ref = (SubStopRef*) signal->getDataPtrSend(); - ref->senderRef = reference(); - ref->senderData = subbPtr.p->m_senderData; - sendSignal(subbPtr.p->m_senderRef, GSN_SUB_STOP_REF, - signal, SubStopRef::SignalLength, JBB); - c_opSubEvent.release(subbPtr); - } - return; - } - /* - * Coordinator - */ - ndbrequire(refToBlock(senderRef) == DBDICT); - if (ref->errorCode == SubStopRef::NF_FakeErrorREF){ - jam(); - subbPtr.p->m_reqTracker.ignoreRef(c_counterMgr, refToNode(senderRef)); - } else { - jam(); - subbPtr.p->m_reqTracker.reportRef(c_counterMgr, refToNode(senderRef)); - } - completeSubStopReq(signal,subbPtr.i,0); -} - -void Dbdict::execSUB_STOP_CONF(Signal* signal) -{ - jamEntry(); - - const SubStopConf* conf = (SubStopConf*) signal->getDataPtr(); - Uint32 senderRef = conf->senderRef; - - OpSubEventPtr subbPtr; - c_opSubEvent.getPtr(subbPtr, conf->senderData); - - if (refToBlock(senderRef) == SUMA) { - /* - * Participant - */ - jam(); - SubStopConf* conf = (SubStopConf*) signal->getDataPtrSend(); - - conf->senderRef = reference(); - conf->senderData = subbPtr.p->m_senderData; - - sendSignal(subbPtr.p->m_senderRef, GSN_SUB_STOP_CONF, - signal, SubStopConf::SignalLength, JBB); - c_opSubEvent.release(subbPtr); - return; - } - /* - * Coordinator - */ - ndbrequire(refToBlock(senderRef) == DBDICT); - subbPtr.p->m_reqTracker.reportConf(c_counterMgr, refToNode(senderRef)); - completeSubStopReq(signal,subbPtr.i,0); -} - -/* - * Coordinator - */ -void Dbdict::completeSubStopReq(Signal* signal, - Uint32 ptrI, - Uint32 returnCode){ - OpSubEventPtr subbPtr; - c_opSubEvent.getPtr(subbPtr, ptrI); - - if (!subbPtr.p->m_reqTracker.done()){ - jam(); - return; - } - - if (subbPtr.p->m_reqTracker.hasRef()) { - jam(); -#ifdef EVENT_DEBUG - ndbout_c("SUB_STOP_REF"); -#endif - SubStopRef* ref = (SubStopRef*)signal->getDataPtrSend(); - - ref->senderRef = reference(); - ref->senderData = subbPtr.p->m_senderData; - /* - ref->subscriptionId = subbPtr.p->m_senderData; - ref->subscriptionKey = subbPtr.p->m_senderData; - ref->part = subbPtr.p->m_part; // SubscriptionData::Part - ref->subscriberData = subbPtr.p->m_subscriberData; - ref->subscriberRef = subbPtr.p->m_subscriberRef; - */ - ref->errorCode = subbPtr.p->m_errorCode; - - - sendSignal(subbPtr.p->m_senderRef, GSN_SUB_STOP_REF, - signal, SubStopRef::SignalLength, JBB); - if (subbPtr.p->m_reqTracker.hasConf()) { - // stopStartedNodes(signal); - } - c_opSubEvent.release(subbPtr); - return; - } -#ifdef EVENT_DEBUG - ndbout_c("SUB_STOP_CONF"); -#endif - sendSignal(subbPtr.p->m_senderRef, GSN_SUB_STOP_CONF, - signal, SubStopConf::SignalLength, JBB); - c_opSubEvent.release(subbPtr); -} - -/*************************************************************** - * MODULE: Drop event. - * - * Drop event. - * - * TODO - */ - -void -Dbdict::execDROP_EVNT_REQ(Signal* signal) -{ - jamEntry(); - EVENT_TRACE; - - DropEvntReq *req = (DropEvntReq*)signal->getDataPtr(); - const Uint32 senderRef = signal->senderBlockRef(); - OpDropEventPtr evntRecPtr; - - // Seize a Create Event record - if (!c_opDropEvent.seize(evntRecPtr)) { - // Failed to allocate event record - jam(); - releaseSections(signal); - - DropEvntRef * ret = (DropEvntRef *)signal->getDataPtrSend(); - ret->setErrorCode(DropEvntRef::SeizeError); - ret->setErrorLine(__LINE__); - ret->setErrorNode(reference()); - sendSignal(senderRef, GSN_DROP_EVNT_REF, signal, - DropEvntRef::SignalLength, JBB); - return; - } - -#ifdef EVENT_DEBUG - ndbout_c("DBDICT::execDROP_EVNT_REQ evntRecId = (%d)", evntRecPtr.i); -#endif - - OpDropEvent* evntRec = evntRecPtr.p; - evntRec->init(req); - - SegmentedSectionPtr ssPtr; - - signal->getSection(ssPtr, 0); - - SimplePropertiesSectionReader r0(ssPtr, getSectionSegmentPool()); -#ifdef EVENT_DEBUG - r0.printAll(ndbout); -#endif - // event name - if ((!r0.first()) || - (r0.getValueType() != SimpleProperties::StringValue) || - (r0.getValueLen() <= 0)) { - jam(); - releaseSections(signal); - - evntRecPtr.p->m_errorCode = DropEvntRef::Undefined; - evntRecPtr.p->m_errorLine = __LINE__; - evntRecPtr.p->m_errorNode = reference(); - - dropEvent_sendReply(signal, evntRecPtr); - return; - } - r0.getString(evntRecPtr.p->m_eventRec.NAME); - { - int len = strlen(evntRecPtr.p->m_eventRec.NAME); - memset(evntRecPtr.p->m_eventRec.NAME+len, 0, MAX_TAB_NAME_SIZE-len); -#ifdef EVENT_DEBUG - printf("DropEvntReq; EventName %s, len %u\n", - evntRecPtr.p->m_eventRec.NAME, len); - for(int i = 0; i < MAX_TAB_NAME_SIZE/4; i++) - printf("H'%.8x ", ((Uint32*)evntRecPtr.p->m_eventRec.NAME)[i]); - printf("\n"); -#endif - } - - releaseSections(signal); - - Callback c = { safe_cast(&Dbdict::dropEventUTIL_PREPARE_READ), 0 }; - - prepareTransactionEventSysTable(&c, signal, evntRecPtr.i, - UtilPrepareReq::Read); -} - -void -Dbdict::dropEventUTIL_PREPARE_READ(Signal* signal, - Uint32 callbackData, - Uint32 returnCode) -{ - jam(); - EVENT_TRACE; - if (returnCode != 0) { - EVENT_TRACE; - dropEventUtilPrepareRef(signal, callbackData, returnCode); - return; - } - - UtilPrepareConf* const req = (UtilPrepareConf*)signal->getDataPtr(); - OpDropEventPtr evntRecPtr; - evntRecPtr.i = req->getSenderData(); - const Uint32 prepareId = req->getPrepareId(); - - ndbrequire((evntRecPtr.p = c_opDropEvent.getPtr(evntRecPtr.i)) != NULL); - - Callback c = { safe_cast(&Dbdict::dropEventUTIL_EXECUTE_READ), 0 }; - - executeTransEventSysTable(&c, signal, - evntRecPtr.i, evntRecPtr.p->m_eventRec, - prepareId, UtilPrepareReq::Read); -} - -void -Dbdict::dropEventUTIL_EXECUTE_READ(Signal* signal, - Uint32 callbackData, - Uint32 returnCode) -{ - jam(); - EVENT_TRACE; - if (returnCode != 0) { - EVENT_TRACE; - dropEventUtilExecuteRef(signal, callbackData, returnCode); - return; - } - - OpDropEventPtr evntRecPtr; - UtilExecuteConf * const ref = (UtilExecuteConf *)signal->getDataPtr(); - jam(); - evntRecPtr.i = ref->getSenderData(); - ndbrequire((evntRecPtr.p = c_opDropEvent.getPtr(evntRecPtr.i)) != NULL); - - parseReadEventSys(signal, evntRecPtr.p->m_eventRec); - - NodeReceiverGroup rg(DBDICT, c_aliveNodes); - RequestTracker & p = evntRecPtr.p->m_reqTracker; - p.init<SubRemoveRef>(c_counterMgr, rg, GSN_SUB_REMOVE_REF, - evntRecPtr.i); - - SubRemoveReq* req = (SubRemoveReq*) signal->getDataPtrSend(); - - req->senderRef = reference(); - req->senderData = evntRecPtr.i; - req->subscriptionId = evntRecPtr.p->m_eventRec.SUBID; - req->subscriptionKey = evntRecPtr.p->m_eventRec.SUBKEY; - - sendSignal(rg, GSN_SUB_REMOVE_REQ, signal, SubRemoveReq::SignalLength, JBB); -} - -/* - * Participant - */ - -void -Dbdict::execSUB_REMOVE_REQ(Signal* signal) -{ - jamEntry(); - - Uint32 origSenderRef = signal->senderBlockRef(); - - OpSubEventPtr subbPtr; - if (!c_opSubEvent.seize(subbPtr)) { - SubRemoveRef * ref = (SubRemoveRef *)signal->getDataPtrSend(); - jam(); - ref->senderRef = reference(); - ref->setTemporary(SubRemoveRef::Busy); - - sendSignal(origSenderRef, GSN_SUB_REMOVE_REF, signal, - SubRemoveRef::SignalLength, JBB); - return; - } - - { - const SubRemoveReq* req = (SubRemoveReq*) signal->getDataPtr(); - subbPtr.p->m_senderRef = req->senderRef; - subbPtr.p->m_senderData = req->senderData; - subbPtr.p->m_errorCode = 0; - } - - SubRemoveReq* req = (SubRemoveReq*) signal->getDataPtrSend(); - req->senderRef = reference(); - req->senderData = subbPtr.i; - - sendSignal(SUMA_REF, GSN_SUB_REMOVE_REQ, signal, SubRemoveReq::SignalLength, JBB); -} - -/* - * Coordintor/Participant - */ - -void -Dbdict::execSUB_REMOVE_REF(Signal* signal) -{ - jamEntry(); - const SubRemoveRef* ref = (SubRemoveRef*) signal->getDataPtr(); - Uint32 senderRef = ref->senderRef; - - if (refToBlock(senderRef) == SUMA) { - /* - * Participant - */ - jam(); - OpSubEventPtr subbPtr; - c_opSubEvent.getPtr(subbPtr, ref->senderData); - if (ref->errorCode == (Uint32) GrepError::SUBSCRIPTION_ID_NOT_FOUND) { - // conf this since this may occur if a nodefailiure has occured - // earlier so that the systable was not cleared - SubRemoveConf* conf = (SubRemoveConf*) signal->getDataPtrSend(); - conf->senderRef = reference(); - conf->senderData = subbPtr.p->m_senderData; - sendSignal(subbPtr.p->m_senderRef, GSN_SUB_REMOVE_CONF, - signal, SubRemoveConf::SignalLength, JBB); - } else { - SubRemoveRef* ref = (SubRemoveRef*) signal->getDataPtrSend(); - ref->senderRef = reference(); - ref->senderData = subbPtr.p->m_senderData; - sendSignal(subbPtr.p->m_senderRef, GSN_SUB_REMOVE_REF, - signal, SubRemoveRef::SignalLength, JBB); - } - c_opSubEvent.release(subbPtr); - return; - } - /* - * Coordinator - */ - ndbrequire(refToBlock(senderRef) == DBDICT); - OpDropEventPtr eventRecPtr; - c_opDropEvent.getPtr(eventRecPtr, ref->senderData); - if (ref->errorCode == SubRemoveRef::NF_FakeErrorREF){ - jam(); - eventRecPtr.p->m_reqTracker.ignoreRef(c_counterMgr, refToNode(senderRef)); - } else { - jam(); - eventRecPtr.p->m_reqTracker.reportRef(c_counterMgr, refToNode(senderRef)); - } - completeSubRemoveReq(signal,eventRecPtr.i,0); -} - -void -Dbdict::execSUB_REMOVE_CONF(Signal* signal) -{ - jamEntry(); - const SubRemoveConf* conf = (SubRemoveConf*) signal->getDataPtr(); - Uint32 senderRef = conf->senderRef; - - if (refToBlock(senderRef) == SUMA) { - /* - * Participant - */ - jam(); - OpSubEventPtr subbPtr; - c_opSubEvent.getPtr(subbPtr, conf->senderData); - SubRemoveConf* conf = (SubRemoveConf*) signal->getDataPtrSend(); - conf->senderRef = reference(); - conf->senderData = subbPtr.p->m_senderData; - sendSignal(subbPtr.p->m_senderRef, GSN_SUB_REMOVE_CONF, - signal, SubRemoveConf::SignalLength, JBB); - c_opSubEvent.release(subbPtr); - return; - } - /* - * Coordinator - */ - ndbrequire(refToBlock(senderRef) == DBDICT); - OpDropEventPtr eventRecPtr; - c_opDropEvent.getPtr(eventRecPtr, conf->senderData); - eventRecPtr.p->m_reqTracker.reportConf(c_counterMgr, refToNode(senderRef)); - completeSubRemoveReq(signal,eventRecPtr.i,0); -} - -void -Dbdict::completeSubRemoveReq(Signal* signal, Uint32 ptrI, Uint32 xxx) -{ - OpDropEventPtr evntRecPtr; - c_opDropEvent.getPtr(evntRecPtr, ptrI); - - if (!evntRecPtr.p->m_reqTracker.done()){ - jam(); - return; - } - - if (evntRecPtr.p->m_reqTracker.hasRef()) { - jam(); - evntRecPtr.p->m_errorNode = reference(); - evntRecPtr.p->m_errorLine = __LINE__; - evntRecPtr.p->m_errorCode = DropEvntRef::Undefined; - dropEvent_sendReply(signal, evntRecPtr); - return; - } - - Callback c = { safe_cast(&Dbdict::dropEventUTIL_PREPARE_DELETE), 0 }; - - prepareTransactionEventSysTable(&c, signal, evntRecPtr.i, - UtilPrepareReq::Delete); -} - -void -Dbdict::dropEventUTIL_PREPARE_DELETE(Signal* signal, - Uint32 callbackData, - Uint32 returnCode) -{ - jam(); - EVENT_TRACE; - if (returnCode != 0) { - EVENT_TRACE; - dropEventUtilPrepareRef(signal, callbackData, returnCode); - return; - } - - UtilPrepareConf* const req = (UtilPrepareConf*)signal->getDataPtr(); - OpDropEventPtr evntRecPtr; - jam(); - evntRecPtr.i = req->getSenderData(); - const Uint32 prepareId = req->getPrepareId(); - - ndbrequire((evntRecPtr.p = c_opDropEvent.getPtr(evntRecPtr.i)) != NULL); -#ifdef EVENT_DEBUG - printf("DropEvntUTIL_PREPARE; evntRecPtr.i len %u\n",evntRecPtr.i); -#endif - - Callback c = { safe_cast(&Dbdict::dropEventUTIL_EXECUTE_DELETE), 0 }; - - executeTransEventSysTable(&c, signal, - evntRecPtr.i, evntRecPtr.p->m_eventRec, - prepareId, UtilPrepareReq::Delete); -} - -void -Dbdict::dropEventUTIL_EXECUTE_DELETE(Signal* signal, - Uint32 callbackData, - Uint32 returnCode) -{ - jam(); - EVENT_TRACE; - if (returnCode != 0) { - EVENT_TRACE; - dropEventUtilExecuteRef(signal, callbackData, returnCode); - return; - } - - OpDropEventPtr evntRecPtr; - UtilExecuteConf * const ref = (UtilExecuteConf *)signal->getDataPtr(); - jam(); - evntRecPtr.i = ref->getSenderData(); - ndbrequire((evntRecPtr.p = c_opDropEvent.getPtr(evntRecPtr.i)) != NULL); - - dropEvent_sendReply(signal, evntRecPtr); -} - -void -Dbdict::dropEventUtilPrepareRef(Signal* signal, - Uint32 callbackData, - Uint32 returnCode) -{ - jam(); - EVENT_TRACE; - UtilPrepareRef * const ref = (UtilPrepareRef *)signal->getDataPtr(); - OpDropEventPtr evntRecPtr; - evntRecPtr.i = ref->getSenderData(); - ndbrequire((evntRecPtr.p = c_opDropEvent.getPtr(evntRecPtr.i)) != NULL); - - bool temporary = false; - interpretUtilPrepareErrorCode((UtilPrepareRef::ErrorCode)ref->getErrorCode(), - temporary, evntRecPtr.p->m_errorLine); - if (temporary) { - evntRecPtr.p->m_errorCode = (DropEvntRef::ErrorCode) - ((Uint32) DropEvntRef::Undefined | (Uint32) DropEvntRef::Temporary); - } - - if (evntRecPtr.p->m_errorCode == 0) { - evntRecPtr.p->m_errorCode = DropEvntRef::Undefined; - evntRecPtr.p->m_errorLine = __LINE__; - } - evntRecPtr.p->m_errorNode = reference(); - - dropEvent_sendReply(signal, evntRecPtr); -} - -void -Dbdict::dropEventUtilExecuteRef(Signal* signal, - Uint32 callbackData, - Uint32 returnCode) -{ - jam(); - EVENT_TRACE; - OpDropEventPtr evntRecPtr; - UtilExecuteRef * const ref = (UtilExecuteRef *)signal->getDataPtr(); - jam(); - evntRecPtr.i = ref->getSenderData(); - ndbrequire((evntRecPtr.p = c_opDropEvent.getPtr(evntRecPtr.i)) != NULL); - - evntRecPtr.p->m_errorNode = reference(); - evntRecPtr.p->m_errorLine = __LINE__; - - switch (ref->getErrorCode()) { - case UtilExecuteRef::TCError: - switch (ref->getTCErrorCode()) { - case ZNOT_FOUND: - jam(); - evntRecPtr.p->m_errorCode = DropEvntRef::EventNotFound; - break; - default: - jam(); - evntRecPtr.p->m_errorCode = DropEvntRef::UndefinedTCError; - break; - } - break; - default: - jam(); - evntRecPtr.p->m_errorCode = DropEvntRef::Undefined; - break; - } - dropEvent_sendReply(signal, evntRecPtr); -} - -void Dbdict::dropEvent_sendReply(Signal* signal, - OpDropEventPtr evntRecPtr) -{ - jam(); - EVENT_TRACE; - Uint32 senderRef = evntRecPtr.p->m_request.getUserRef(); - - if (evntRecPtr.p->hasError()) { - jam(); - DropEvntRef * ret = (DropEvntRef *)signal->getDataPtrSend(); - - ret->setUserData(evntRecPtr.p->m_request.getUserData()); - ret->setUserRef(evntRecPtr.p->m_request.getUserRef()); - - ret->setErrorCode(evntRecPtr.p->m_errorCode); - ret->setErrorLine(evntRecPtr.p->m_errorLine); - ret->setErrorNode(evntRecPtr.p->m_errorNode); - - sendSignal(senderRef, GSN_DROP_EVNT_REF, signal, - DropEvntRef::SignalLength, JBB); - } else { - jam(); - DropEvntConf * evntConf = (DropEvntConf *)signal->getDataPtrSend(); - - evntConf->setUserData(evntRecPtr.p->m_request.getUserData()); - evntConf->setUserRef(evntRecPtr.p->m_request.getUserRef()); - - sendSignal(senderRef, GSN_DROP_EVNT_CONF, signal, - DropEvntConf::SignalLength, JBB); - } - - c_opDropEvent.release(evntRecPtr); -} /** * MODULE: Alter index @@ -9639,7 +7753,7 @@ Dbdict::alterIndex_toDropTc(Signal* signal, OpAlterIndexPtr opPtr) // broken index allowed if force if (! (indexPtr.p->indexLocal & TableRecord::IL_CREATED_TC)) { jam(); - ndbrequire(opPtr.p->m_requestFlag & RequestFlag::RF_FORCE); + ndbassert(opPtr.p->m_requestFlag & RequestFlag::RF_FORCE); alterIndex_sendReply(signal, opPtr, false); return; } @@ -11696,7 +9810,7 @@ Dbdict::alterTrigger_toDropLocal(Signal* signal, OpAlterTriggerPtr opPtr) // broken trigger allowed if force if (! (triggerPtr.p->triggerLocal & TriggerRecord::TL_CREATED_TC)) { jam(); - ndbrequire(opPtr.p->m_requestFlag & RequestFlag::RF_FORCE); + ndbassert(opPtr.p->m_requestFlag & RequestFlag::RF_FORCE); alterTrigger_sendReply(signal, opPtr, false); return; } @@ -11706,6 +9820,7 @@ Dbdict::alterTrigger_toDropLocal(Signal* signal, OpAlterTriggerPtr opPtr) // broken trigger allowed if force if (! (triggerPtr.p->triggerLocal & TriggerRecord::TL_CREATED_LQH)) { jam(); + ndbassert(opPtr.p->m_requestFlag & RequestFlag::RF_FORCE); alterTrigger_sendReply(signal, opPtr, false); return; } @@ -11913,6 +10028,275 @@ Dbdict::getIndexAttrMask(TableRecordPtr indexPtr, AttributeMask& mask) } } +// DICT lock master + +const Dbdict::DictLockType* +Dbdict::getDictLockType(Uint32 lockType) +{ + static const DictLockType lt[] = { + { DictLockReq::NodeRestartLock, BS_NODE_RESTART, "NodeRestart" } + }; + for (int i = 0; i < sizeof(lt)/sizeof(lt[0]); i++) { + if (lt[i].lockType == lockType) + return <[i]; + } + return NULL; +} + +void +Dbdict::sendDictLockInfoEvent(Uint32 pollCount) +{ + DictLockPtr loopPtr; + c_dictLockQueue.first(loopPtr); + unsigned count = 0; + + char queue_buf[100]; + char *p = &queue_buf[0]; + const char *const q = &queue_buf[sizeof(queue_buf)]; + *p = 0; + + while (loopPtr.i != RNIL) { + jam(); + my_snprintf(p, q-p, "%s%u%s", + ++count == 1 ? "" : " ", + (unsigned)refToNode(loopPtr.p->req.userRef), + loopPtr.p->locked ? "L" : ""); + p += strlen(p); + c_dictLockQueue.next(loopPtr); + } + + infoEvent("DICT: lock bs: %d ops: %d poll: %d cnt: %d queue: %s", + (int)c_blockState, + c_opRecordPool.getSize() - c_opRecordPool.getNoOfFree(), + c_dictLockPoll, (int)pollCount, queue_buf); +} + +void +Dbdict::sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text) +{ + infoEvent("DICT: %s %u for %s", + text, + (unsigned)refToNode(lockPtr.p->req.userRef), lockPtr.p->lt->text); +} + +void +Dbdict::execDICT_LOCK_REQ(Signal* signal) +{ + jamEntry(); + const DictLockReq* req = (const DictLockReq*)&signal->theData[0]; + + // make sure bad request crashes slave, not master (us) + + if (getOwnNodeId() != c_masterNodeId) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::NotMaster); + return; + } + + const DictLockType* lt = getDictLockType(req->lockType); + if (lt == NULL) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::InvalidLockType); + return; + } + + if (req->userRef != signal->getSendersBlockRef() || + getNodeInfo(refToNode(req->userRef)).m_type != NodeInfo::DB) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::BadUserRef); + return; + } + + if (c_aliveNodes.get(refToNode(req->userRef))) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::TooLate); + return; + } + + DictLockPtr lockPtr; + if (! c_dictLockQueue.seize(lockPtr)) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::TooManyRequests); + return; + } + + lockPtr.p->req = *req; + lockPtr.p->locked = false; + lockPtr.p->lt = lt; + + checkDictLockQueue(signal, false); + + if (! lockPtr.p->locked) + sendDictLockInfoEvent(lockPtr, "lock request by node"); +} + +void +Dbdict::checkDictLockQueue(Signal* signal, bool poll) +{ + Uint32 pollCount = ! poll ? 0 : signal->theData[1]; + + DictLockPtr lockPtr; + + do { + if (! c_dictLockQueue.first(lockPtr)) { + jam(); + setDictLockPoll(signal, false, pollCount); + return; + } + + if (lockPtr.p->locked) { + jam(); + ndbrequire(c_blockState == lockPtr.p->lt->blockState); + break; + } + + if (c_opRecordPool.getNoOfFree() != c_opRecordPool.getSize()) { + jam(); + break; + } + + ndbrequire(c_blockState == BS_IDLE); + lockPtr.p->locked = true; + c_blockState = lockPtr.p->lt->blockState; + sendDictLockConf(signal, lockPtr); + + sendDictLockInfoEvent(lockPtr, "locked by node"); + } while (0); + + // poll while first request is open + // this routine is called again when it is removed for any reason + + bool on = ! lockPtr.p->locked; + setDictLockPoll(signal, on, pollCount); +} + +void +Dbdict::execDICT_UNLOCK_ORD(Signal* signal) +{ + jamEntry(); + const DictUnlockOrd* ord = (const DictUnlockOrd*)&signal->theData[0]; + + DictLockPtr lockPtr; + c_dictLockQueue.getPtr(lockPtr, ord->lockPtr); + ndbrequire(lockPtr.p->lt->lockType == ord->lockType); + + if (lockPtr.p->locked) { + jam(); + ndbrequire(c_blockState == lockPtr.p->lt->blockState); + ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize()); + ndbrequire(! c_dictLockQueue.hasPrev(lockPtr)); + + c_blockState = BS_IDLE; + sendDictLockInfoEvent(lockPtr, "unlocked by node"); + } else { + sendDictLockInfoEvent(lockPtr, "lock request removed by node"); + } + + c_dictLockQueue.release(lockPtr); + + checkDictLockQueue(signal, false); +} + +void +Dbdict::sendDictLockConf(Signal* signal, DictLockPtr lockPtr) +{ + DictLockConf* conf = (DictLockConf*)&signal->theData[0]; + const DictLockReq& req = lockPtr.p->req; + + conf->userPtr = req.userPtr; + conf->lockType = req.lockType; + conf->lockPtr = lockPtr.i; + + sendSignal(req.userRef, GSN_DICT_LOCK_CONF, signal, + DictLockConf::SignalLength, JBB); +} + +void +Dbdict::sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode) +{ + DictLockRef* ref = (DictLockRef*)&signal->theData[0]; + + ref->userPtr = req.userPtr; + ref->lockType = req.lockType; + ref->errorCode = errorCode; + + sendSignal(req.userRef, GSN_DICT_LOCK_REF, signal, + DictLockRef::SignalLength, JBB); +} + +// control polling + +void +Dbdict::setDictLockPoll(Signal* signal, bool on, Uint32 pollCount) +{ + if (on) { + jam(); + signal->theData[0] = ZDICT_LOCK_POLL; + signal->theData[1] = pollCount + 1; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2); + } + + bool change = (c_dictLockPoll != on); + + if (change) { + jam(); + c_dictLockPoll = on; + } + + // avoid too many messages if master is stuck busy (BS_NODE_FAILURE) + bool periodic = + pollCount < 8 || + pollCount < 64 && pollCount % 8 == 0 || + pollCount < 512 && pollCount % 64 == 0 || + pollCount < 4096 && pollCount % 512 == 0 || + pollCount % 4096 == 0; // about every 6 minutes + + if (change || periodic) + sendDictLockInfoEvent(pollCount); +} + +// NF handling + +void +Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes) +{ + DictLockPtr loopPtr; + c_dictLockQueue.first(loopPtr); + + if (getOwnNodeId() != c_masterNodeId) { + ndbrequire(loopPtr.i == RNIL); + return; + } + + while (loopPtr.i != RNIL) { + jam(); + DictLockPtr lockPtr = loopPtr; + c_dictLockQueue.next(loopPtr); + + Uint32 nodeId = refToNode(lockPtr.p->req.userRef); + + if (NodeBitmask::get(theFailedNodes, nodeId)) { + if (lockPtr.p->locked) { + jam(); + ndbrequire(c_blockState == lockPtr.p->lt->blockState); + ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize()); + ndbrequire(! c_dictLockQueue.hasPrev(lockPtr)); + + c_blockState = BS_IDLE; + + sendDictLockInfoEvent(lockPtr, "remove lock by failed node"); + } else { + sendDictLockInfoEvent(lockPtr, "remove lock request by failed node"); + } + + c_dictLockQueue.release(lockPtr); + } + } + + checkDictLockQueue(signal, false); +} + + /* **************************************************************** */ /* ---------------------------------------------------------------- */ /* MODULE: STORE/RESTORE SCHEMA FILE---------------------- */ @@ -11924,36 +10308,75 @@ Dbdict::getIndexAttrMask(TableRecordPtr indexPtr, AttributeMask& mask) /* **************************************************************** */ void -Dbdict::initSchemaFile(SchemaFile * sf, Uint32 fileSz){ - memcpy(sf->Magic, "NDBSCHMA", sizeof(sf->Magic)); - sf->ByteOrder = 0x12345678; - sf->NdbVersion = NDB_VERSION; - sf->FileSize = fileSz; - sf->CheckSum = 0; - - Uint32 headSz = (sizeof(SchemaFile)-sizeof(SchemaFile::TableEntry)); - Uint32 noEntries = (fileSz - headSz) / sizeof(SchemaFile::TableEntry); - Uint32 slack = (fileSz - headSz) - noEntries * sizeof(SchemaFile::TableEntry); - - ndbrequire(noEntries > MAX_TABLES); +Dbdict::initSchemaFile(XSchemaFile * xsf, Uint32 firstPage, Uint32 lastPage, + bool initEntries) +{ + ndbrequire(lastPage <= xsf->noOfPages); + for (Uint32 n = firstPage; n < lastPage; n++) { + SchemaFile * sf = &xsf->schemaPage[n]; + if (initEntries) + memset(sf, 0, NDB_SF_PAGE_SIZE); + + Uint32 ndb_version = NDB_VERSION; + if (ndb_version < NDB_SF_VERSION_5_0_6) + ndb_version = NDB_SF_VERSION_5_0_6; - sf->NoOfTableEntries = noEntries; - memset(sf->TableEntries, 0, noEntries*sizeof(SchemaFile::TableEntry)); - memset(&(sf->TableEntries[noEntries]), 0, slack); - computeChecksum(sf); + memcpy(sf->Magic, NDB_SF_MAGIC, sizeof(sf->Magic)); + sf->ByteOrder = 0x12345678; + sf->NdbVersion = ndb_version; + sf->FileSize = xsf->noOfPages * NDB_SF_PAGE_SIZE; + sf->PageNumber = n; + sf->CheckSum = 0; + sf->NoOfTableEntries = NDB_SF_PAGE_ENTRIES; + + computeChecksum(xsf, n); + } } void -Dbdict::computeChecksum(SchemaFile * sf){ +Dbdict::resizeSchemaFile(XSchemaFile * xsf, Uint32 noOfPages) +{ + ndbrequire(noOfPages <= NDB_SF_MAX_PAGES); + if (xsf->noOfPages < noOfPages) { + jam(); + Uint32 firstPage = xsf->noOfPages; + xsf->noOfPages = noOfPages; + initSchemaFile(xsf, 0, firstPage, false); + initSchemaFile(xsf, firstPage, xsf->noOfPages, true); + } + if (xsf->noOfPages > noOfPages) { + jam(); + Uint32 tableId = noOfPages * NDB_SF_PAGE_ENTRIES; + while (tableId < xsf->noOfPages * NDB_SF_PAGE_ENTRIES) { + SchemaFile::TableEntry * te = getTableEntry(xsf, tableId); + if (te->m_tableState != SchemaFile::INIT && + te->m_tableState != SchemaFile::DROP_TABLE_COMMITTED) { + ndbrequire(false); + } + tableId++; + } + xsf->noOfPages = noOfPages; + initSchemaFile(xsf, 0, xsf->noOfPages, false); + } +} + +void +Dbdict::computeChecksum(XSchemaFile * xsf, Uint32 pageNo){ + SchemaFile * sf = &xsf->schemaPage[pageNo]; sf->CheckSum = 0; - sf->CheckSum = computeChecksum((const Uint32*)sf, sf->FileSize/4); + sf->CheckSum = computeChecksum((Uint32*)sf, NDB_SF_PAGE_SIZE_IN_WORDS); } bool -Dbdict::validateChecksum(const SchemaFile * sf){ +Dbdict::validateChecksum(const XSchemaFile * xsf){ - Uint32 c = computeChecksum((const Uint32*)sf, sf->FileSize/4); - return c == 0; + for (Uint32 n = 0; n < xsf->noOfPages; n++) { + SchemaFile * sf = &xsf->schemaPage[n]; + Uint32 c = computeChecksum((Uint32*)sf, NDB_SF_PAGE_SIZE_IN_WORDS); + if ( c != 0) + return false; + } + return true; } Uint32 @@ -11965,11 +10388,14 @@ Dbdict::computeChecksum(const Uint32 * src, Uint32 len){ } SchemaFile::TableEntry * -Dbdict::getTableEntry(void * p, Uint32 tableId, bool allowTooBig){ - SchemaFile * sf = (SchemaFile*)p; - - ndbrequire(allowTooBig || tableId < sf->NoOfTableEntries); - return &sf->TableEntries[tableId]; +Dbdict::getTableEntry(XSchemaFile * xsf, Uint32 tableId) +{ + Uint32 n = tableId / NDB_SF_PAGE_ENTRIES; + Uint32 i = tableId % NDB_SF_PAGE_ENTRIES; + ndbrequire(n < xsf->noOfPages); + + SchemaFile * sf = &xsf->schemaPage[n]; + return &sf->TableEntries[i]; } // global metadata support @@ -12074,3 +10500,5 @@ Dbdict::getMetaAttribute(MetaData::Attribute& attr, const MetaData::Table& table new (&attr) MetaData::Attribute(*attrPtr.p); return 0; } + +CArray<KeyDescriptor> g_key_descriptor_pool; diff --git a/ndb/src/kernel/blocks/dbdict/Dbdict.hpp b/ndb/src/kernel/blocks/dbdict/Dbdict.hpp index bcee4a52b6a..ed8b7e3b822 100644 --- a/ndb/src/kernel/blocks/dbdict/Dbdict.hpp +++ b/ndb/src/kernel/blocks/dbdict/Dbdict.hpp @@ -26,6 +26,7 @@ #include <pc.hpp> #include <ArrayList.hpp> #include <DLHashTable.hpp> +#include <DLFifoList.hpp> #include <CArray.hpp> #include <KeyTable2.hpp> #include <SimulatedBlock.hpp> @@ -45,11 +46,10 @@ #include <signaldata/DropIndx.hpp> #include <signaldata/AlterIndx.hpp> #include <signaldata/BuildIndx.hpp> -#include <signaldata/UtilPrepare.hpp> -#include <signaldata/CreateEvnt.hpp> #include <signaldata/CreateTrig.hpp> #include <signaldata/DropTrig.hpp> #include <signaldata/AlterTrig.hpp> +#include <signaldata/DictLock.hpp> #include "SchemaFile.hpp" #include <blocks/mutexes.hpp> #include <SafeCounter.hpp> @@ -63,6 +63,7 @@ /*--------------------------------------------------------------*/ #define ZPACK_TABLE_INTO_PAGES 0 #define ZSEND_GET_TAB_RESPONSE 3 +#define ZDICT_LOCK_POLL 4 /*--------------------------------------------------------------*/ @@ -78,7 +79,8 @@ /*--------------------------------------------------------------*/ // Page constants /*--------------------------------------------------------------*/ -#define ZALLOCATE 1 //Variable number of page for NDBFS +#define ZBAT_SCHEMA_FILE 0 //Variable number of page for NDBFS +#define ZBAT_TABLE_FILE 1 //Variable number of page for NDBFS #define ZPAGE_HEADER_SIZE 32 #define ZPOS_PAGE_SIZE 16 #define ZPOS_CHECKSUM 17 @@ -92,7 +94,7 @@ #define ZSIZE_OF_PAGES_IN_WORDS 8192 #define ZLOG_SIZE_OF_PAGES_IN_WORDS 13 #define ZMAX_PAGES_OF_TABLE_DEFINITION 8 -#define ZNUMBER_OF_PAGES (2 * ZMAX_PAGES_OF_TABLE_DEFINITION + 2) +#define ZNUMBER_OF_PAGES (ZMAX_PAGES_OF_TABLE_DEFINITION + 1) #define ZNO_OF_FRAGRECORD 5 /*--------------------------------------------------------------*/ @@ -130,6 +132,10 @@ public: * on disk. Index trigger ids are volatile. */ struct TableRecord : public MetaData::Table { + Uint32 maxRowsLow; + Uint32 maxRowsHigh; + Uint32 minRowsLow; + Uint32 minRowsHigh; /**************************************************** * Support variables for table handling ****************************************************/ @@ -212,7 +218,9 @@ public: IL_CREATED_TC = 1 << 0 // created in TC }; Uint32 indexLocal; - + + Uint32 noOfNullBits; + inline bool equal(TableRecord & rec) const { return strcmp(tableName, rec.tableName) == 0; } @@ -426,6 +434,12 @@ public: typedef Ptr<PageRecord> PageRecordPtr; CArray<PageRecord> c_pageRecordArray; + struct SchemaPageRecord { + Uint32 word[NDB_SF_PAGE_SIZE_IN_WORDS]; + }; + + CArray<SchemaPageRecord> c_schemaPageRecordArray; + /** * A page for create index table signal. */ @@ -501,45 +515,6 @@ private: void execBACKUP_FRAGMENT_REQ(Signal*); - // Util signals used by Event code - void execUTIL_PREPARE_CONF(Signal* signal); - void execUTIL_PREPARE_REF (Signal* signal); - void execUTIL_EXECUTE_CONF(Signal* signal); - void execUTIL_EXECUTE_REF (Signal* signal); - void execUTIL_RELEASE_CONF(Signal* signal); - void execUTIL_RELEASE_REF (Signal* signal); - - - // Event signals from API - void execCREATE_EVNT_REQ (Signal* signal); - void execCREATE_EVNT_CONF(Signal* signal); - void execCREATE_EVNT_REF (Signal* signal); - - void execDROP_EVNT_REQ (Signal* signal); - - void execSUB_START_REQ (Signal* signal); - void execSUB_START_CONF (Signal* signal); - void execSUB_START_REF (Signal* signal); - - void execSUB_STOP_REQ (Signal* signal); - void execSUB_STOP_CONF (Signal* signal); - void execSUB_STOP_REF (Signal* signal); - - // Event signals from SUMA - - void execCREATE_SUBID_CONF(Signal* signal); - void execCREATE_SUBID_REF (Signal* signal); - - void execSUB_CREATE_CONF(Signal* signal); - void execSUB_CREATE_REF (Signal* signal); - - void execSUB_SYNC_CONF(Signal* signal); - void execSUB_SYNC_REF (Signal* signal); - - void execSUB_REMOVE_REQ(Signal* signal); - void execSUB_REMOVE_CONF(Signal* signal); - void execSUB_REMOVE_REF(Signal* signal); - // Trigger signals void execCREATE_TRIG_REQ(Signal* signal); void execCREATE_TRIG_CONF(Signal* signal); @@ -578,6 +553,9 @@ private: void execALTER_TAB_CONF(Signal* signal); bool check_ndb_versions() const; + void execDICT_LOCK_REQ(Signal* signal); + void execDICT_UNLOCK_ORD(Signal* signal); + /* * 2.4 COMMON STORED VARIABLES */ @@ -653,16 +631,20 @@ private: struct ReadSchemaRecord { /** Page Id of schema page */ Uint32 pageId; + /** First page to read */ + Uint32 firstPage; + /** Number of pages to read */ + Uint32 noOfPages; /** State, indicates from where it was called */ enum SchemaReadState { IDLE = 0, - INITIAL_READ = 1 + INITIAL_READ_HEAD = 1, + INITIAL_READ = 2 }; SchemaReadState schemaReadState; }; ReadSchemaRecord c_readSchemaRecord; -private: /** * This record stores all the state needed * when a schema file is being written to disk @@ -670,6 +652,12 @@ private: struct WriteSchemaRecord { /** Page Id of schema page */ Uint32 pageId; + /** Rewrite entire file */ + Uint32 newFile; + /** First page to write */ + Uint32 firstPage; + /** Number of pages to write */ + Uint32 noOfPages; /** Schema Files Handled, local state variable */ Uint32 noOfSchemaFilesHandled; @@ -750,21 +738,33 @@ private: * Word 4: Currently zero ****************************************************************************/ struct SchemaRecord { - /** Schema page */ + /** Schema file first page (0) */ Uint32 schemaPage; - /** Old Schema page (used at node restart) */ + /** Old Schema file first page (used at node restart) */ Uint32 oldSchemaPage; Callback m_callback; }; SchemaRecord c_schemaRecord; - void initSchemaFile(SchemaFile *, Uint32 sz); - void computeChecksum(SchemaFile *); - bool validateChecksum(const SchemaFile *); - SchemaFile::TableEntry * getTableEntry(void * buf, Uint32 tableId, - bool allowTooBig = false); + /* + * Schema file, list of schema pages. Use an array until a pool + * exists and NDBFS interface can use it. + */ + struct XSchemaFile { + SchemaFile* schemaPage; + Uint32 noOfPages; + }; + // 0-normal 1-old + XSchemaFile c_schemaFile[2]; + + void initSchemaFile(XSchemaFile *, Uint32 firstPage, Uint32 lastPage, + bool initEntries); + void resizeSchemaFile(XSchemaFile * xsf, Uint32 noOfPages); + void computeChecksum(XSchemaFile *, Uint32 pageNo); + bool validateChecksum(const XSchemaFile *); + SchemaFile::TableEntry * getTableEntry(XSchemaFile *, Uint32 tableId); Uint32 computeChecksum(const Uint32 * src, Uint32 len); @@ -786,12 +786,43 @@ private: // State variables /* ----------------------------------------------------------------------- */ +#ifndef ndb_dbdict_log_block_state enum BlockState { BS_IDLE = 0, BS_CREATE_TAB = 1, BS_BUSY = 2, - BS_NODE_FAILURE = 3 + BS_NODE_FAILURE = 3, + BS_NODE_RESTART = 4 + }; +#else // quick hack to log changes + enum { + BS_IDLE = 0, + BS_CREATE_TAB = 1, + BS_BUSY = 2, + BS_NODE_FAILURE = 3, + BS_NODE_RESTART = 4 }; + struct BlockState; + friend struct BlockState; + struct BlockState { + BlockState() : + m_value(BS_IDLE) { + } + BlockState(int value) : + m_value(value) { + } + operator int() const { + return m_value; + } + BlockState& operator=(const BlockState& bs) { + Dbdict* dict = (Dbdict*)globalData.getBlock(DBDICT); + dict->infoEvent("DICT: bs %d->%d", m_value, bs.m_value); + m_value = bs.m_value; + return *this; + } + int m_value; + }; +#endif BlockState c_blockState; struct PackTable { @@ -1314,119 +1345,6 @@ private: typedef Ptr<OpBuildIndex> OpBuildIndexPtr; /** - * Operation record for Util Signals. - */ - struct OpSignalUtil : OpRecordCommon{ - Callback m_callback; - Uint32 m_userData; - }; - typedef Ptr<OpSignalUtil> OpSignalUtilPtr; - - /** - * Operation record for subscribe-start-stop - */ - struct OpSubEvent : OpRecordCommon { - Uint32 m_senderRef; - Uint32 m_senderData; - Uint32 m_errorCode; - RequestTracker m_reqTracker; - }; - typedef Ptr<OpSubEvent> OpSubEventPtr; - - static const Uint32 sysTab_NDBEVENTS_0_szs[]; - - /** - * Operation record for create event. - */ - struct OpCreateEvent : OpRecordCommon { - // original request (event id will be added) - CreateEvntReq m_request; - //AttributeMask m_attrListBitmask; - // AttributeList m_attrList; - sysTab_NDBEVENTS_0 m_eventRec; - // char m_eventName[MAX_TAB_NAME_SIZE]; - // char m_tableName[MAX_TAB_NAME_SIZE]; - - // coordinator DICT - RequestTracker m_reqTracker; - // state info - CreateEvntReq::RequestType m_requestType; - Uint32 m_requestFlag; - // error info - CreateEvntRef::ErrorCode m_errorCode; - Uint32 m_errorLine; - Uint32 m_errorNode; - // ctor - OpCreateEvent() { - memset(&m_request, 0, sizeof(m_request)); - m_requestType = CreateEvntReq::RT_UNDEFINED; - m_requestFlag = 0; - m_errorCode = CreateEvntRef::NoError; - m_errorLine = 0; - m_errorNode = 0; - } - void init(const CreateEvntReq* req, Dbdict* dp) { - m_request = *req; - m_errorCode = CreateEvntRef::NoError; - m_errorLine = 0; - m_errorNode = 0; - m_requestType = req->getRequestType(); - m_requestFlag = req->getRequestFlag(); - } - bool hasError() { - return m_errorCode != CreateEvntRef::NoError; - } - void setError(const CreateEvntRef* ref) { - if (ref != 0 && ! hasError()) { - m_errorCode = ref->getErrorCode(); - m_errorLine = ref->getErrorLine(); - m_errorNode = ref->getErrorNode(); - } - } - - }; - typedef Ptr<OpCreateEvent> OpCreateEventPtr; - - /** - * Operation record for drop event. - */ - struct OpDropEvent : OpRecordCommon { - // original request - DropEvntReq m_request; - // char m_eventName[MAX_TAB_NAME_SIZE]; - sysTab_NDBEVENTS_0 m_eventRec; - RequestTracker m_reqTracker; - // error info - DropEvntRef::ErrorCode m_errorCode; - Uint32 m_errorLine; - Uint32 m_errorNode; - // ctor - OpDropEvent() { - memset(&m_request, 0, sizeof(m_request)); - m_errorCode = DropEvntRef::NoError; - m_errorLine = 0; - m_errorNode = 0; - } - void init(const DropEvntReq* req) { - m_request = *req; - m_errorCode = DropEvntRef::NoError; - m_errorLine = 0; - m_errorNode = 0; - } - bool hasError() { - return m_errorCode != DropEvntRef::NoError; - } - void setError(const DropEvntRef* ref) { - if (ref != 0 && ! hasError()) { - m_errorCode = ref->getErrorCode(); - m_errorLine = ref->getErrorLine(); - m_errorNode = ref->getErrorNode(); - } - } - }; - typedef Ptr<OpDropEvent> OpDropEventPtr; - - /** * Operation record for create trigger. */ struct OpCreateTrigger : OpRecordCommon { @@ -1646,10 +1564,6 @@ public: STATIC_CONST( opDropIndexSize = sizeof(OpDropIndex) ); STATIC_CONST( opAlterIndexSize = sizeof(OpAlterIndex) ); STATIC_CONST( opBuildIndexSize = sizeof(OpBuildIndex) ); - STATIC_CONST( opCreateEventSize = sizeof(OpCreateEvent) ); - STATIC_CONST( opSubEventSize = sizeof(OpSubEvent) ); - STATIC_CONST( opDropEventSize = sizeof(OpDropEvent) ); - STATIC_CONST( opSignalUtilSize = sizeof(OpSignalUtil) ); STATIC_CONST( opCreateTriggerSize = sizeof(OpCreateTrigger) ); STATIC_CONST( opDropTriggerSize = sizeof(OpDropTrigger) ); STATIC_CONST( opAlterTriggerSize = sizeof(OpAlterTrigger) ); @@ -1660,10 +1574,6 @@ private: Uint32 u_opDropTable [PTR_ALIGN(opDropTableSize)]; Uint32 u_opCreateIndex [PTR_ALIGN(opCreateIndexSize)]; Uint32 u_opDropIndex [PTR_ALIGN(opDropIndexSize)]; - Uint32 u_opCreateEvent [PTR_ALIGN(opCreateEventSize)]; - Uint32 u_opSubEvent [PTR_ALIGN(opSubEventSize)]; - Uint32 u_opDropEvent [PTR_ALIGN(opDropEventSize)]; - Uint32 u_opSignalUtil [PTR_ALIGN(opSignalUtilSize)]; Uint32 u_opAlterIndex [PTR_ALIGN(opAlterIndexSize)]; Uint32 u_opBuildIndex [PTR_ALIGN(opBuildIndexSize)]; Uint32 u_opCreateTrigger[PTR_ALIGN(opCreateTriggerSize)]; @@ -1680,10 +1590,6 @@ private: KeyTable2<OpDropIndex, OpRecordUnion> c_opDropIndex; KeyTable2<OpAlterIndex, OpRecordUnion> c_opAlterIndex; KeyTable2<OpBuildIndex, OpRecordUnion> c_opBuildIndex; - KeyTable2<OpCreateEvent, OpRecordUnion> c_opCreateEvent; - KeyTable2<OpSubEvent, OpRecordUnion> c_opSubEvent; - KeyTable2<OpDropEvent, OpRecordUnion> c_opDropEvent; - KeyTable2<OpSignalUtil, OpRecordUnion> c_opSignalUtil; KeyTable2<OpCreateTrigger, OpRecordUnion> c_opCreateTrigger; KeyTable2<OpDropTrigger, OpRecordUnion> c_opDropTrigger; KeyTable2<OpAlterTrigger, OpRecordUnion> c_opAlterTrigger; @@ -1691,6 +1597,70 @@ private: // Unique key for operation XXX move to some system table Uint32 c_opRecordSequence; + /* + * Master DICT can be locked in 2 mutually exclusive ways: + * + * 1) for schema ops, via operation records + * 2) against schema ops, via a lock queue + * + * Current use of 2) is by a starting node, to prevent schema ops + * until started. The ops are refused (BlockState != BS_IDLE), + * not queued. + * + * Master failure is not handled, in node start case the starting + * node will crash too anyway. Use lock table in future.. + * + * The lock queue is "serial" but other behaviour is possible + * by checking lock types e.g. to allow parallel node starts. + * + * Checking release of last op record is not convenient with + * current structure (5.0). Instead we poll via continueB. + * + * XXX only table ops check BlockState + */ + struct DictLockType; + friend struct DictLockType; + + struct DictLockType { + DictLockReq::LockType lockType; + BlockState blockState; + const char* text; + }; + + struct DictLockRecord; + friend struct DictLockRecord; + + struct DictLockRecord { + DictLockReq req; + const DictLockType* lt; + bool locked; + union { + Uint32 nextPool; + Uint32 nextList; + }; + Uint32 prevList; + }; + + typedef Ptr<DictLockRecord> DictLockPtr; + ArrayPool<DictLockRecord> c_dictLockPool; + DLFifoList<DictLockRecord> c_dictLockQueue; + bool c_dictLockPoll; + + static const DictLockType* getDictLockType(Uint32 lockType); + void sendDictLockInfoEvent(Uint32 pollCount); + void sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text); + + void checkDictLockQueue(Signal* signal, bool poll); + void sendDictLockConf(Signal* signal, DictLockPtr lockPtr); + void sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode); + + // control polling i.e. continueB loop + void setDictLockPoll(Signal* signal, bool on, Uint32 pollCount); + + // NF handling + void removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes); + + // Statement blocks /* ------------------------------------------------------------ */ @@ -1713,7 +1683,8 @@ private: bool getNewAttributeRecord(TableRecordPtr tablePtr, AttributeRecordPtr & attrPtr); void packTableIntoPages(Signal* signal, Uint32 tableId, Uint32 pageId); - void packTableIntoPagesImpl(SimpleProperties::Writer &, TableRecordPtr); + void packTableIntoPagesImpl(SimpleProperties::Writer &, TableRecordPtr, + Signal* signal= 0); void sendGET_TABINFOREQ(Signal* signal, Uint32 tableId); @@ -1761,7 +1732,8 @@ private: void openSchemaFile(Signal* signal, Uint32 fileNo, Uint32 fsPtr, - bool writeFlag); + bool writeFlag, + bool newFile); void writeSchemaFile(Signal* signal, Uint32 filePtr, Uint32 fsPtr); void writeSchemaConf(Signal* signal, FsConnectRecordPtr fsPtr); @@ -1803,6 +1775,7 @@ private: void readSchemaRef(Signal* signal, FsConnectRecordPtr fsPtr); void closeReadSchemaConf(Signal* signal, FsConnectRecordPtr fsPtr); + bool convertSchemaFileTo_5_0_6(XSchemaFile*); /* ------------------------------------------------------------ */ // Get table definitions @@ -1880,101 +1853,6 @@ private: void buildIndex_sendSlaveReq(Signal* signal, OpBuildIndexPtr opPtr); void buildIndex_sendReply(Signal* signal, OpBuildIndexPtr opPtr, bool); - // Events - void - createEventUTIL_PREPARE(Signal* signal, - Uint32 callbackData, - Uint32 returnCode); - void - createEventUTIL_EXECUTE(Signal *signal, - Uint32 callbackData, - Uint32 returnCode); - void - dropEventUTIL_PREPARE_READ(Signal* signal, - Uint32 callbackData, - Uint32 returnCode); - void - dropEventUTIL_EXECUTE_READ(Signal* signal, - Uint32 callbackData, - Uint32 returnCode); - void - dropEventUTIL_PREPARE_DELETE(Signal* signal, - Uint32 callbackData, - Uint32 returnCode); - void - dropEventUTIL_EXECUTE_DELETE(Signal *signal, - Uint32 callbackData, - Uint32 returnCode); - void - dropEventUtilPrepareRef(Signal* signal, - Uint32 callbackData, - Uint32 returnCode); - void - dropEventUtilExecuteRef(Signal* signal, - Uint32 callbackData, - Uint32 returnCode); - int - sendSignalUtilReq(Callback *c, - BlockReference ref, - GlobalSignalNumber gsn, - Signal* signal, - Uint32 length, - JobBufferLevel jbuf, - LinearSectionPtr ptr[3], - Uint32 noOfSections); - int - recvSignalUtilReq(Signal* signal, Uint32 returnCode); - - void completeSubStartReq(Signal* signal, Uint32 ptrI, Uint32 returnCode); - void completeSubStopReq(Signal* signal, Uint32 ptrI, Uint32 returnCode); - void completeSubRemoveReq(Signal* signal, Uint32 ptrI, Uint32 returnCode); - - void dropEvent_sendReply(Signal* signal, - OpDropEventPtr evntRecPtr); - - void createEvent_RT_USER_CREATE(Signal* signal, OpCreateEventPtr evntRecPtr); - void createEventComplete_RT_USER_CREATE(Signal* signal, - OpCreateEventPtr evntRecPtr); - void createEvent_RT_USER_GET(Signal* signal, OpCreateEventPtr evntRecPtr); - void createEventComplete_RT_USER_GET(Signal* signal, OpCreateEventPtr evntRecPtr); - - void createEvent_RT_DICT_AFTER_GET(Signal* signal, OpCreateEventPtr evntRecPtr); - - void createEvent_nodeFailCallback(Signal* signal, Uint32 eventRecPtrI, - Uint32 returnCode); - void createEvent_sendReply(Signal* signal, OpCreateEventPtr evntRecPtr, - LinearSectionPtr *ptr = NULL, int noLSP = 0); - - void prepareTransactionEventSysTable (Callback *c, - Signal* signal, - Uint32 senderData, - UtilPrepareReq::OperationTypeValue prepReq); - void prepareUtilTransaction(Callback *c, - Signal* signal, - Uint32 senderData, - Uint32 tableId, - const char *tableName, - UtilPrepareReq::OperationTypeValue prepReq, - Uint32 noAttr, - Uint32 attrIds[], - const char *attrNames[]); - - void executeTransEventSysTable(Callback *c, - Signal *signal, - const Uint32 ptrI, - sysTab_NDBEVENTS_0& m_eventRec, - const Uint32 prepareId, - UtilPrepareReq::OperationTypeValue prepReq); - void executeTransaction(Callback *c, - Signal* signal, - Uint32 senderData, - Uint32 prepareId, - Uint32 noAttr, - LinearSectionPtr headerPtr, - LinearSectionPtr dataPtr); - - void parseReadEventSys(Signal *signal, sysTab_NDBEVENTS_0& m_eventRec); - // create trigger void createTrigger_recvReply(Signal* signal, const CreateTrigConf* conf, const CreateTrigRef* ref); diff --git a/ndb/src/kernel/blocks/dbdict/DictLock.txt b/ndb/src/kernel/blocks/dbdict/DictLock.txt new file mode 100644 index 00000000000..72e23ed15a5 --- /dev/null +++ b/ndb/src/kernel/blocks/dbdict/DictLock.txt @@ -0,0 +1,98 @@ +Lock master DICT against schema operations + +Implementation +-------------- + +[ see comments in Dbdict.hpp ] + +Use case: Node startup INR / NR +------------------------------- + +Master DICT (like any block) keeps list of alive nodes (c_aliveNodes). +These are participants in schema ops. + +(1) c_aliveNodes is initialized when DICT starts + in sp3 in READ_NODESCONF from CNTR + +(2) when slave node fails (in any sp of the slave node) + it is removed from c_aliveNodes in NODE_FAILREP + +(3) when slave starts, it is added to c_aliveNodes + in sp4 of the starting node in INCL_NODEREQ + +Slave DIH locks master DICT in sp2 and releases the lock when started. +Based on the constraints: + +- the lock is taken when master DICT is known + DIH reads this in sp2 in READ_NODESCONF + +- the lock is taken before (3) + +- the lock is taken before copying starts and held until it is done + in sp4 DIH meta, DICT meta, tuple data + +- on INR in sp2 in START_PERMREQ the LCP info of the slave is erased + in all DIH in invalidateNodeLCP() - not safe under schema ops + +Signals: + +All but DICT_LOCK are standard v5.0 signals. +s=starting node, m=master, a=all participants, l=local block. + +* sp2 - DICT_LOCK and START_PERM + +DIH/s + DICT_LOCK_REQ + DICT/m + DICT_LOCK_CONF +DIH/s + START_PERMREQ + DIH/m + START_INFOREQ + DIH/a + invalidateNodeLCP() if INR + DIH/a + START_INFOCONF + DIH/m + START_PERMCONF +DIH/s + +* sp4 - START_ME (copy metadata, no changes) + +DIH/s + START_MEREQ + DIH/m + COPY_TABREQ + DIH/s + COPY_TABCONF + DIH/m + DICTSTARTREQ + DICT/s + GET_SCHEMA_INFOREQ + DICT/m + SCHEMA_INFO + DICT/s + DICTSTARTCONF + DIH/m + INCL_NODEREQ + DIH/a + INCL_NODEREQ + ANY/l + INCL_NODECONF + DIH/a + INCL_NODECONF + DIH/m + START_MECONF +DIH/s + +* (copy data, omitted) + +* SL_STARTED - release DICT lock + +CNTR/s + NODE_START_REP + DIH/s + DICT_UNLOCK_ORD + DICT/m + +# vim: set et sw=4: diff --git a/ndb/src/kernel/blocks/dbdict/SchemaFile.hpp b/ndb/src/kernel/blocks/dbdict/SchemaFile.hpp index 7c3223d3d14..0226991a073 100644 --- a/ndb/src/kernel/blocks/dbdict/SchemaFile.hpp +++ b/ndb/src/kernel/blocks/dbdict/SchemaFile.hpp @@ -18,16 +18,35 @@ #define DBDICT_SCHEMA_FILE_HPP #include <ndb_types.h> +#include <ndb_version.h> #include <string.h> +#define NDB_SF_MAGIC "NDBSCHMA" + +// page size 4k +#define NDB_SF_PAGE_SIZE_IN_WORDS_LOG2 10 +#define NDB_SF_PAGE_SIZE_IN_WORDS (1 << NDB_SF_PAGE_SIZE_IN_WORDS_LOG2) +#define NDB_SF_PAGE_SIZE (NDB_SF_PAGE_SIZE_IN_WORDS << 2) + +// 4k = (1 + 127) * 32 +#define NDB_SF_PAGE_ENTRIES 127 + +// 160 pages = 20320 objects +#define NDB_SF_MAX_PAGES 160 + +// versions where format changed +#define NDB_SF_VERSION_5_0_6 MAKE_VERSION(5, 0, 6) + +// One page in schema file. struct SchemaFile { + // header size 32 bytes char Magic[8]; Uint32 ByteOrder; Uint32 NdbVersion; Uint32 FileSize; // In bytes - Uint32 Unused; - - Uint32 CheckSum; + Uint32 PageNumber; + Uint32 CheckSum; // Of this page + Uint32 NoOfTableEntries; // On this page (NDB_SF_PAGE_ENTRIES) enum TableState { INIT = 0, @@ -38,20 +57,33 @@ struct SchemaFile { ALTER_TABLE_COMMITTED = 5 }; + // entry size 32 bytes struct TableEntry { Uint32 m_tableState; Uint32 m_tableVersion; Uint32 m_tableType; - Uint32 m_noOfPages; + Uint32 m_info_words; Uint32 m_gcp; + Uint32 m_unused[3]; bool operator==(const TableEntry& o) const { return memcmp(this, &o, sizeof(* this))== 0; } }; + + // pre-5.0.6 + struct TableEntry_old { + Uint32 m_tableState; + Uint32 m_tableVersion; + Uint32 m_tableType; + Uint32 m_noOfPages; + Uint32 m_gcp; + }; - Uint32 NoOfTableEntries; - TableEntry TableEntries[1]; + union { + TableEntry TableEntries[NDB_SF_PAGE_ENTRIES]; + TableEntry_old TableEntries_old[1]; + }; }; #endif diff --git a/ndb/src/kernel/blocks/dbdict/printSchemaFile.cpp b/ndb/src/kernel/blocks/dbdict/printSchemaFile.cpp index a8b84298ebe..f73654fd9d5 100644 --- a/ndb/src/kernel/blocks/dbdict/printSchemaFile.cpp +++ b/ndb/src/kernel/blocks/dbdict/printSchemaFile.cpp @@ -16,19 +16,33 @@ #include <ndb_global.h> +#include <ndb_version.h> #include <NdbMain.h> #include <NdbOut.hpp> #include <SchemaFile.hpp> -void -usage(const char * prg){ - ndbout << "Usage " << prg - << " P0.SchemaLog" << endl; +static const char* progname = 0; +static bool allflag = false; +static bool checkonly = false; +static bool equalcontents = false; +static bool okquiet = false; + +static void +usage() +{ + ndbout + << "Usage: " << progname << " [-aceq]" << " file ..." << endl + << "-a print also unused slots" << endl + << "-c check only (return status 1 on error)" << endl + << "-e check also that the files have identical contents" << endl + << "-q no output if file is ok" << endl + << "Example: " << progname << " -ceq ndb_*_fs/D[12]/DBDICT/P0.SchemaLog" << endl; } -void -fill(const char * buf, int mod){ +static void +fill(const char * buf, int mod) +{ int len = strlen(buf)+1; ndbout << buf << " "; while((len % mod) != 0){ @@ -37,68 +51,222 @@ fill(const char * buf, int mod){ } } -void -print(const char * filename, const SchemaFile * file){ - ndbout << "----- Schemafile: " << filename << " -----" << endl; - ndbout_c("Magic: %.*s ByteOrder: %.8x NdbVersion: %d FileSize: %d", - sizeof(file->Magic), file->Magic, - file->ByteOrder, - file->NdbVersion, - file->FileSize); - - for(Uint32 i = 0; i<file->NoOfTableEntries; i++){ - SchemaFile::TableEntry te = file->TableEntries[i]; - if(te.m_tableState != SchemaFile::INIT){ - ndbout << "Table " << i << ": State = " << te.m_tableState - << " version = " << te.m_tableVersion - << " type = " << te.m_tableType - << " noOfPages = " << te.m_noOfPages - << " gcp: " << te.m_gcp << endl; - } +static const char* +version(Uint32 v) +{ + static char buf[40]; + sprintf(buf, "%d.%d.%d", v >> 16, (v >> 8) & 0xFF, v & 0xFF); + return buf; +} + +static int +print_head(const char * filename, const SchemaFile * sf) +{ + int retcode = 0; + + if (! checkonly) { + ndbout << "----- Schemafile: " << filename << " -----" << endl; + ndbout_c("Magic: %.*s ByteOrder: %.8x NdbVersion: %s FileSize: %d", + sizeof(sf->Magic), + sf->Magic, + sf->ByteOrder, + version(sf->NdbVersion), + sf->FileSize); } + + if (memcmp(sf->Magic, "NDBSCHMA", sizeof(sf->Magic) != 0)) { + ndbout << filename << ": invalid header magic" << endl; + retcode = 1; + } + + if ((sf->NdbVersion >> 16) < 4 || (sf->NdbVersion >> 16) > 9) { + ndbout << filename << ": impossible version " << hex << sf->NdbVersion << endl; + retcode = 1; + } + + return retcode; } -NDB_COMMAND(printSchemafile, - "printSchemafile", "printSchemafile", "Prints a schemafile", 16384){ - if(argc < 2){ - usage(argv[0]); - return 0; +static int +print_old(const char * filename, const SchemaFile * sf, Uint32 sz) +{ + int retcode = 0; + + if (print_head(filename, sf) != 0) + retcode = 1; + + for (Uint32 i = 0; i < sf->NoOfTableEntries; i++) { + SchemaFile::TableEntry_old te = sf->TableEntries_old[i]; + if (allflag || + (te.m_tableState != SchemaFile::INIT && + te.m_tableState != SchemaFile::DROP_TABLE_COMMITTED)) { + if (! checkonly) + ndbout << "Table " << i << ":" + << " State = " << te.m_tableState + << " version = " << te.m_tableVersion + << " type = " << te.m_tableType + << " noOfPages = " << te.m_noOfPages + << " gcp: " << te.m_gcp << endl; + } } + return retcode; +} + +static int +print(const char * filename, const SchemaFile * xsf, Uint32 sz) +{ + int retcode = 0; - const char * filename = argv[1]; + if (print_head(filename, xsf) != 0) + retcode = 1; - struct stat sbuf; - const int res = stat(filename, &sbuf); - if(res != 0){ - ndbout << "Could not find file: \"" << filename << "\"" << endl; - return 0; + assert(sizeof(SchemaFile) == NDB_SF_PAGE_SIZE); + if (xsf->FileSize != sz || xsf->FileSize % NDB_SF_PAGE_SIZE != 0) { + ndbout << filename << ": invalid FileSize " << xsf->FileSize << endl; + retcode = 1; } - const Uint32 bytes = sbuf.st_size; - - Uint32 * buf = new Uint32[bytes/4+1]; - - FILE * f = fopen(filename, "rb"); - if(f == 0){ - ndbout << "Failed to open file" << endl; - delete [] buf; - return 0; + Uint32 noOfPages = xsf->FileSize / NDB_SF_PAGE_SIZE; + for (Uint32 n = 0; n < noOfPages; n++) { + if (! checkonly) { + ndbout << "----- Page: " << n << " (" << noOfPages << ") -----" << endl; + } + const SchemaFile * sf = &xsf[n]; + if (memcmp(sf->Magic, xsf->Magic, sizeof(sf->Magic)) != 0) { + ndbout << filename << ": page " << n << " invalid magic" << endl; + retcode = 1; + } + if (sf->FileSize != xsf->FileSize) { + ndbout << filename << ": page " << n << " FileSize changed to " << sf->FileSize << "!=" << xsf->FileSize << endl; + retcode = 1; + } + Uint32 cs = 0; + for (Uint32 j = 0; j < NDB_SF_PAGE_SIZE_IN_WORDS; j++) + cs ^= ((const Uint32*)sf)[j]; + if (cs != 0) { + ndbout << filename << ": page " << n << " invalid CheckSum" << endl; + retcode = 1; + } + if (sf->NoOfTableEntries != NDB_SF_PAGE_ENTRIES) { + ndbout << filename << ": page " << n << " invalid NoOfTableEntries " << sf->NoOfTableEntries << endl; + retcode = 1; + } + for (Uint32 i = 0; i < NDB_SF_PAGE_ENTRIES; i++) { + SchemaFile::TableEntry te = sf->TableEntries[i]; + Uint32 j = n * NDB_SF_PAGE_ENTRIES + i; + if (allflag || + (te.m_tableState != SchemaFile::INIT && + te.m_tableState != SchemaFile::DROP_TABLE_COMMITTED)) { + if (! checkonly) + ndbout << "Table " << j << ":" + << " State = " << te.m_tableState + << " version = " << te.m_tableVersion + << " type = " << te.m_tableType + << " noOfWords = " << te.m_info_words + << " gcp: " << te.m_gcp << endl; + } + if (te.m_unused[0] != 0 || te.m_unused[1] != 0 || te.m_unused[2] != 0) { + ndbout << filename << ": entry " << j << " garbage in m_unused[3]" << endl; + retcode = 1; + } + } } - Uint32 sz = fread(buf, 1, bytes, f); - fclose(f); - if(sz != bytes){ - ndbout << "Failure while reading file" << endl; - delete [] buf; - return 0; + + return retcode; +} + +NDB_COMMAND(printSchemafile, + "printSchemafile", "printSchemafile", "Prints a schemafile", 16384) +{ + progname = argv[0]; + int exitcode = 0; + + while (argc > 1 && argv[1][0] == '-') { + if (strchr(argv[1], 'a') != 0) + allflag = true; + if (strchr(argv[1], 'c') != 0) + checkonly = true; + if (strchr(argv[1], 'e') != 0) + equalcontents = true; + if (strchr(argv[1], 'q') != 0) + okquiet = true; + if (strchr(argv[1], 'h') != 0 || strchr(argv[1], '?') != 0) { + usage(); + return 0; + } + argc--, argv++; } - - print(filename, (SchemaFile *)&buf[0]); - Uint32 chk = 0, i; - for (i = 0; i < bytes/4; i++) - chk ^= buf[i]; - if (chk != 0) - ndbout << "Invalid checksum!" << endl; + const char * prevfilename = 0; + Uint32 * prevbuf = 0; + Uint32 prevbytes = 0; + + while (argc > 1) { + const char * filename = argv[1]; + argc--, argv++; + + struct stat sbuf; + const int res = stat(filename, &sbuf); + if (res != 0) { + ndbout << filename << ": not found errno=" << errno << endl; + exitcode = 1; + continue; + } + const Uint32 bytes = sbuf.st_size; + + Uint32 * buf = new Uint32[bytes/4+1]; + + FILE * f = fopen(filename, "rb"); + if (f == 0) { + ndbout << filename << ": open failed errno=" << errno << endl; + delete [] buf; + exitcode = 1; + continue; + } + Uint32 sz = fread(buf, 1, bytes, f); + fclose(f); + if (sz != bytes) { + ndbout << filename << ": read failed errno=" << errno << endl; + delete [] buf; + exitcode = 1; + continue; + } + + if (sz < 32) { + ndbout << filename << ": too short (no header)" << endl; + delete [] buf; + exitcode = 1; + continue; + } + + SchemaFile* sf = (SchemaFile *)&buf[0]; + int ret; + if (sf->NdbVersion < NDB_SF_VERSION_5_0_6) + ret = print_old(filename, sf, sz); + else + ret = print(filename, sf, sz); + + if (ret != 0) { + ndbout << filename << ": check failed" + << " version=" << version(sf->NdbVersion) << endl; + exitcode = 1; + } else if (! okquiet) { + ndbout << filename << ": ok" + << " version=" << version(sf->NdbVersion) << endl; + } + + if (equalcontents && prevfilename != 0) { + if (prevbytes != bytes || memcmp(prevbuf, buf, bytes) != 0) { + ndbout << filename << ": differs from " << prevfilename << endl; + exitcode = 1; + } + } + + prevfilename = filename; + delete [] prevbuf; + prevbuf = buf; + prevbytes = bytes; + } - delete [] buf; - return 0; + delete [] prevbuf; + return exitcode; } diff --git a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp index 78acf1ffd19..5c2cfac5eb1 100644 --- a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp +++ b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp @@ -718,6 +718,9 @@ private: void checkPrepDropTabComplete(Signal *, TabRecordPtr tabPtr); void checkWaitDropTabFailedLqh(Signal *, Uint32 nodeId, Uint32 tableId); + void execDICT_LOCK_CONF(Signal* signal); + void execDICT_LOCK_REF(Signal* signal); + // Statement blocks //------------------------------------ // Methods that send signals @@ -935,6 +938,7 @@ private: void initialStartCompletedLab(Signal *); void allNodesLcpCompletedLab(Signal *); void nodeRestartPh2Lab(Signal *); + void nodeRestartPh2Lab2(Signal *); void initGciFilesLab(Signal *); void dictStartConfLab(Signal *); void nodeDictStartConfLab(Signal *); @@ -1594,6 +1598,33 @@ private: * Reply from nodeId */ void startInfoReply(Signal *, Uint32 nodeId); + + // DIH specifics for execNODE_START_REP (sendDictUnlockOrd) + void exec_node_start_rep(Signal* signal); + + /* + * Lock master DICT. Only current use is by starting node + * during NR. A pool of slave records is convenient anyway. + */ + struct DictLockSlaveRecord { + Uint32 lockPtr; + Uint32 lockType; + bool locked; + Callback callback; + Uint32 nextPool; + }; + + typedef Ptr<DictLockSlaveRecord> DictLockSlavePtr; + ArrayPool<DictLockSlaveRecord> c_dictLockSlavePool; + + // slave + void sendDictLockReq(Signal* signal, Uint32 lockType, Callback c); + void recvDictLockConf(Signal* signal); + void sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI); + + // NR + Uint32 c_dictLockSlavePtrI_nodeRestart; // userPtr for NR + void recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret); }; #if (DIH_CDATA_SIZE < _SYSFILE_SIZE32) diff --git a/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp b/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp index 1c14163fe76..2b878034258 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp @@ -66,11 +66,15 @@ void Dbdih::initData() waitGCPProxyPool.setSize(ZPROXY_FILE_SIZE); waitGCPMasterPool.setSize(ZPROXY_MASTER_FILE_SIZE); + c_dictLockSlavePool.setSize(1); // assert single usage + c_dictLockSlavePtrI_nodeRestart = RNIL; + cgcpOrderBlocked = 0; c_lcpState.ctcCounter = 0; cwaitLcpSr = false; c_blockCommit = false; c_blockCommitNo = 1; + cntrlblockref = RNIL; }//Dbdih::initData() void Dbdih::initRecords() @@ -262,8 +266,21 @@ Dbdih::Dbdih(const class Configuration & config): addRecSignal(GSN_CREATE_FRAGMENTATION_REQ, &Dbdih::execCREATE_FRAGMENTATION_REQ); - - initData(); + + addRecSignal(GSN_DICT_LOCK_CONF, &Dbdih::execDICT_LOCK_CONF); + addRecSignal(GSN_DICT_LOCK_REF, &Dbdih::execDICT_LOCK_REF); + + apiConnectRecord = 0; + connectRecord = 0; + fileRecord = 0; + fragmentstore = 0; + pageRecord = 0; + replicaRecord = 0; + tabRecord = 0; + createReplicaRecord = 0; + nodeGroupRecord = 0; + nodeRecord = 0; + takeOverRecord = 0; }//Dbdih::Dbdih() Dbdih::~Dbdih() diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 65c864bd853..4ca4e15111c 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -67,6 +67,7 @@ #include <signaldata/CreateFragmentation.hpp> #include <signaldata/LqhFrag.hpp> #include <signaldata/FsOpenReq.hpp> +#include <signaldata/DictLock.hpp> #include <DebuggerNames.hpp> #include <EventLogger.hpp> @@ -247,7 +248,7 @@ void Dbdih::sendSTART_RECREQ(Signal* signal, Uint32 nodeId) req->newestGci = SYSFILE->newestRestorableGCI; sendSignal(ref, GSN_START_RECREQ, signal, StartRecReq::SignalLength, JBB); - signal->theData[0] = EventReport::StartREDOLog; + signal->theData[0] = NDB_LE_StartREDOLog; signal->theData[1] = nodeId; signal->theData[2] = SYSFILE->keepGCI; signal->theData[3] = SYSFILE->lastCompletedGCI[nodeId]; @@ -544,7 +545,7 @@ void Dbdih::execCONTINUEB(Signal* signal) break; case DihContinueB::ZSTART_PERMREQ_AGAIN: jam(); - nodeRestartPh2Lab(signal); + nodeRestartPh2Lab2(signal); return; break; case DihContinueB::SwitchReplica: @@ -1053,17 +1054,25 @@ void Dbdih::execREAD_CONFIG_REQ(Signal* signal) const ndb_mgm_configuration_iterator * p = theConfiguration.getOwnConfigIterator(); - ndbrequire(p != 0); - - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_API_CONNECT, - &capiConnectFileSize)); - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_CONNECT,&cconnectFileSize)); - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_FRAG_CONNECT, - &cfragstoreFileSize)); - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_REPLICAS, - &creplicaFileSize)); - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_TABLE, &ctabFileSize)) - cfileFileSize = (2 * ctabFileSize) + 2; + ndbrequireErr(p != 0, NDBD_EXIT_INVALID_CONFIG); + + initData(); + + ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_API_CONNECT, + &capiConnectFileSize), + NDBD_EXIT_INVALID_CONFIG); + ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_CONNECT, + &cconnectFileSize), + NDBD_EXIT_INVALID_CONFIG); + ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_FRAG_CONNECT, + &cfragstoreFileSize), + NDBD_EXIT_INVALID_CONFIG); + ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_REPLICAS, + &creplicaFileSize), + NDBD_EXIT_INVALID_CONFIG); + ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_TABLE, &ctabFileSize), + NDBD_EXIT_INVALID_CONFIG); + cfileFileSize = (2 * ctabFileSize) + 2; initRecords(); initialiseRecordsLab(signal, 0, ref, senderData); return; @@ -1276,6 +1285,7 @@ void Dbdih::execNDB_STTOR(Signal* signal) case NodeState::ST_INITIAL_NODE_RESTART: case NodeState::ST_NODE_RESTART: jam(); + /*********************************************************************** * When starting nodes while system is operational we must be controlled * by the master since only one node restart is allowed at a time. @@ -1286,7 +1296,7 @@ void Dbdih::execNDB_STTOR(Signal* signal) req->startingRef = reference(); req->startingVersion = 0; // Obsolete sendSignal(cmasterdihref, GSN_START_MEREQ, signal, - StartMeReq::SignalLength, JBB); + StartMeReq::SignalLength, JBB); return; } ndbrequire(false); @@ -1354,6 +1364,27 @@ void Dbdih::execNDB_STTOR(Signal* signal) }//Dbdih::execNDB_STTOR() void +Dbdih::exec_node_start_rep(Signal* signal) +{ + /* + * Send DICT_UNLOCK_ORD when this node is SL_STARTED. + * + * Sending it before (sp 7) conflicts with code which assumes + * SL_STARTING means we are in copy phase of NR. + * + * NodeState::starting.restartType is not supposed to be used + * when SL_STARTED. Also it seems NODE_START_REP can arrive twice. + * + * For these reasons there are no consistency checks and + * we rely on c_dictLockSlavePtrI_nodeRestart alone. + */ + if (c_dictLockSlavePtrI_nodeRestart != RNIL) { + sendDictUnlockOrd(signal, c_dictLockSlavePtrI_nodeRestart); + c_dictLockSlavePtrI_nodeRestart = RNIL; + } +} + +void Dbdih::createMutexes(Signal * signal, Uint32 count){ Callback c = { safe_cast(&Dbdih::createMutex_done), count }; @@ -1494,9 +1525,7 @@ void Dbdih::execREAD_NODESCONF(Signal* signal) "Illegal configuration change." " Initial start needs to be performed " " when changing no of storage nodes (node %d)", i); - progError(__LINE__, - ERR_INVALID_CONFIG, - buf); + progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf); } } @@ -1558,6 +1587,35 @@ void Dbdih::execREAD_NODESCONF(Signal* signal) /*---------------------------------------------------------------------------*/ void Dbdih::nodeRestartPh2Lab(Signal* signal) { + /* + * Lock master DICT to avoid metadata operations during INR/NR. + * Done just before START_PERMREQ. + * + * It would be more elegant to do this just before START_MEREQ. + * The problem is, on INR we end up in massive invalidateNodeLCP + * which is not fully protected against metadata ops. + */ + ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL); + + // check that we are not yet taking part in schema ops + CRASH_INSERTION(7174); + + Uint32 lockType = DictLockReq::NodeRestartLock; + Callback c = { safe_cast(&Dbdih::recvDictLockConf_nodeRestart), 0 }; + sendDictLockReq(signal, lockType, c); +} + +void Dbdih::recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret) +{ + ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL); + ndbrequire(data != RNIL); + c_dictLockSlavePtrI_nodeRestart = data; + + nodeRestartPh2Lab2(signal); +} + +void Dbdih::nodeRestartPh2Lab2(Signal* signal) +{ /*------------------------------------------------------------------------*/ // REQUEST FOR PERMISSION FROM MASTER TO START A NODE IN AN ALREADY // RUNNING SYSTEM. @@ -1568,7 +1626,7 @@ void Dbdih::nodeRestartPh2Lab(Signal* signal) req->nodeId = cownNodeId; req->startType = cstarttype; sendSignal(cmasterdihref, GSN_START_PERMREQ, signal, 3, JBB); -}//Dbdih::nodeRestartPh2Lab() +} void Dbdih::execSTART_PERMCONF(Signal* signal) { @@ -1604,7 +1662,7 @@ void Dbdih::execSTART_PERMREF(Signal* signal) " with --initial as partial start has been performed" " and this filesystem is unusable"); progError(__LINE__, - ERR_SR_RESTARTCONFLICT, + NDBD_EXIT_SR_RESTARTCONFLICT, buf); ndbrequire(false); } @@ -1690,12 +1748,12 @@ void Dbdih::execSTART_PERMREQ(Signal* signal) const BlockReference retRef = req->blockRef; const Uint32 nodeId = req->nodeId; const Uint32 typeStart = req->startType; - CRASH_INSERTION(7122); ndbrequire(isMaster()); ndbrequire(refToNode(retRef) == nodeId); if ((c_nodeStartMaster.activeState) || - (c_nodeStartMaster.wait != ZFALSE)) { + (c_nodeStartMaster.wait != ZFALSE) || + ERROR_INSERTED_CLEAR(7175)) { jam(); signal->theData[0] = nodeId; signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR; @@ -1868,7 +1926,7 @@ void Dbdih::nodeDictStartConfLab(Signal* signal) /*-----------------------------------------------------------------*/ // Report that node restart has completed copy of dictionary. /*-----------------------------------------------------------------*/ - signal->theData[0] = EventReport::NR_CopyDict; + signal->theData[0] = NDB_LE_NR_CopyDict; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB); }//Dbdih::nodeDictStartConfLab() @@ -1889,7 +1947,7 @@ void Dbdih::gcpBlockedLab(Signal* signal) /*-----------------------------------------------------------------*/ // Report that node restart has completed copy of distribution info. /*-----------------------------------------------------------------*/ - signal->theData[0] = EventReport::NR_CopyDistr; + signal->theData[0] = NDB_LE_NR_CopyDistr; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB); /** @@ -1964,9 +2022,6 @@ void Dbdih::execINCL_NODECONF(Signal* signal) signal->theData[0] = reference(); signal->theData[1] = c_nodeStartSlave.nodeId; sendSignal(BACKUP_REF, GSN_INCL_NODEREQ, signal, 2, JBB); - - // Suma will not send response to this for now, later... - sendSignal(SUMA_REF, GSN_INCL_NODEREQ, signal, 2, JBB); return; }//if if (TstartNode_or_blockref == numberToRef(BACKUP, getOwnNodeId())){ @@ -2050,7 +2105,7 @@ void Dbdih::execSTART_COPYREQ(Signal* signal) /*-------------------------------------------------------------------------*/ // REPORT Copy process of node restart is now about to start up. /*-------------------------------------------------------------------------*/ - signal->theData[0] = EventReport::NR_CopyFragsStarted; + signal->theData[0] = NDB_LE_NR_CopyFragsStarted; signal->theData[1] = startNodeId; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); @@ -3031,7 +3086,7 @@ void Dbdih::execCREATE_FRAGCONF(Signal* signal) /* --------------------------------------------------------------------- */ // REPORT that copy of fragment has been completed. /* --------------------------------------------------------------------- */ - signal->theData[0] = EventReport::NR_CopyFragDone; + signal->theData[0] = NDB_LE_NR_CopyFragDone; signal->theData[1] = takeOverPtr.p->toStartingNode; signal->theData[2] = tabPtr.i; signal->theData[3] = takeOverPtr.p->toCurrentFragid; @@ -3269,7 +3324,7 @@ Dbdih::switchPrimaryMutex_locked(Signal* signal, Uint32 toPtrI, Uint32 retVal){ void Dbdih::toCopyCompletedLab(Signal * signal, TakeOverRecordPtr takeOverPtr) { - signal->theData[0] = EventReport::NR_CopyFragsCompleted; + signal->theData[0] = NDB_LE_NR_CopyFragsCompleted; signal->theData[1] = takeOverPtr.p->toStartingNode; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); @@ -3593,9 +3648,7 @@ void Dbdih::selectMasterCandidateAndSend(Signal* signal) " Initial start needs to be performed " " when changing no of replicas (%d != %d)", node_groups[nodePtr.i], cnoReplicas); - progError(__LINE__, - ERR_INVALID_CONFIG, - buf); + progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf); } } }//Dbdih::selectMasterCandidate() @@ -3802,6 +3855,11 @@ void Dbdih::execNODE_FAILREP(Signal* signal) /*------------------------------------------------------------------------*/ // Verify that a starting node has also crashed. Reset the node start record. /*-------------------------------------------------------------------------*/ +#if 0 + /** + * Node will crash by itself... + * nodeRestart is run then... + */ if (false && c_nodeStartMaster.startNode != RNIL && getNodeStatus(c_nodeStartMaster.startNode) == NodeRecord::ALIVE) { BlockReference cntrRef = calcNdbCntrBlockRef(c_nodeStartMaster.startNode); @@ -3813,6 +3871,7 @@ void Dbdih::execNODE_FAILREP(Signal* signal) sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal, SystemError::SignalLength, JBA); nodeResetStart(); }//if +#endif /*--------------------------------------------------*/ /* */ @@ -3884,9 +3943,7 @@ void Dbdih::execNODE_FAILREP(Signal* signal) if(getNodeState().getNodeRestartInProgress()){ jam(); - progError(__LINE__, - ERR_SYSTEM_ERROR, - "Unhandle master failure during node restart"); + progError(__LINE__, NDBD_EXIT_MASTER_FAILURE_DURING_NR); } } @@ -4550,7 +4607,7 @@ void Dbdih::startGcpMasterTakeOver(Signal* signal, Uint32 oldMasterId){ sendLoopMacro(MASTER_GCPREQ, sendMASTER_GCPREQ); cgcpMasterTakeOverState = GMTOS_INITIAL; - signal->theData[0] = EventReport::GCP_TakeoverStarted; + signal->theData[0] = NDB_LE_GCP_TakeoverStarted; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB); setLocalNodefailHandling(signal, oldMasterId, NF_GCP_TAKE_OVER); @@ -5059,7 +5116,7 @@ void Dbdih::MASTER_GCPhandling(Signal* signal, Uint32 failedNodeId) break; }//switch - signal->theData[0] = EventReport::GCP_TakeoverCompleted; + signal->theData[0] = NDB_LE_GCP_TakeoverCompleted; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB); /*--------------------------------------------------*/ @@ -5509,7 +5566,7 @@ Dbdih::checkEmptyLcpComplete(Signal *signal){ if(isMaster()){ jam(); - signal->theData[0] = EventReport::LCP_TakeoverStarted; + signal->theData[0] = NDB_LE_LCP_TakeoverStarted; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB); signal->theData[0] = 7012; @@ -5981,7 +6038,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) ndbrequire(false); break; }//switch - signal->theData[0] = EventReport::LCP_TakeoverCompleted; + signal->theData[0] = NDB_LE_LCP_TakeoverCompleted; signal->theData[1] = c_lcpMasterTakeOverState.state; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); @@ -6013,7 +6070,7 @@ void Dbdih::execNF_COMPLETEREP(Signal* signal) /* -------------------------------------------------------------------- */ // Report the event that DBTC completed node failure handling. /* -------------------------------------------------------------------- */ - signal->theData[0] = EventReport::NodeFailCompleted; + signal->theData[0] = NDB_LE_NodeFailCompleted; signal->theData[1] = DBTC; signal->theData[2] = failedNodePtr.i; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); @@ -6026,7 +6083,7 @@ void Dbdih::execNF_COMPLETEREP(Signal* signal) /* --------------------------------------------------------------------- */ // Report the event that DBDICT completed node failure handling. /* --------------------------------------------------------------------- */ - signal->theData[0] = EventReport::NodeFailCompleted; + signal->theData[0] = NDB_LE_NodeFailCompleted; signal->theData[1] = DBDICT; signal->theData[2] = failedNodePtr.i; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); @@ -6039,7 +6096,7 @@ void Dbdih::execNF_COMPLETEREP(Signal* signal) /* --------------------------------------------------------------------- */ // Report the event that DBDIH completed node failure handling. /* --------------------------------------------------------------------- */ - signal->theData[0] = EventReport::NodeFailCompleted; + signal->theData[0] = NDB_LE_NodeFailCompleted; signal->theData[1] = DBDIH; signal->theData[2] = failedNodePtr.i; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); @@ -6052,7 +6109,7 @@ void Dbdih::execNF_COMPLETEREP(Signal* signal) /* --------------------------------------------------------------------- */ // Report the event that DBDIH completed node failure handling. /* --------------------------------------------------------------------- */ - signal->theData[0] = EventReport::NodeFailCompleted; + signal->theData[0] = NDB_LE_NodeFailCompleted; signal->theData[1] = DBLQH; signal->theData[2] = failedNodePtr.i; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); @@ -6087,7 +6144,7 @@ void Dbdih::execNF_COMPLETEREP(Signal* signal) /* -------------------------------------------------------------------- */ // Report the event that nodeId has completed node failure handling. /* -------------------------------------------------------------------- */ - signal->theData[0] = EventReport::NodeFailCompleted; + signal->theData[0] = NDB_LE_NodeFailCompleted; signal->theData[1] = 0; signal->theData[2] = failedNodePtr.i; signal->theData[3] = nodeId; @@ -6160,7 +6217,7 @@ void Dbdih::nodeFailCompletedCheckLab(Signal* signal, /* ---------------------------------------------------------------------- */ // Report the event that all nodes completed node failure handling. /* ---------------------------------------------------------------------- */ - signal->theData[0] = EventReport::NodeFailCompleted; + signal->theData[0] = NDB_LE_NodeFailCompleted; signal->theData[1] = 0; signal->theData[2] = failedNodePtr.i; signal->theData[3] = 0; @@ -6315,9 +6372,12 @@ void Dbdih::execCREATE_FRAGMENTATION_REQ(Signal * signal){ if (primaryTableId == RNIL) { if(fragmentNode == 0){ jam(); - // needs to be fixed for single fragment tables - NGPtr.i = 0; //c_nextNodeGroup; - c_nextNodeGroup = (NGPtr.i + 1 == cnoOfNodeGroups ? 0 : NGPtr.i + 1); + NGPtr.i = 0; + if(noOfFragments < csystemnodes) + { + NGPtr.i = c_nextNodeGroup; + c_nextNodeGroup = (NGPtr.i + 1 == cnoOfNodeGroups ? 0 : NGPtr.i + 1); + } } else if(! (fragmentNode < MAX_NDB_NODES)) { jam(); err = CreateFragmentationRef::InvalidNodeId; @@ -6370,33 +6430,28 @@ void Dbdih::execCREATE_FRAGMENTATION_REQ(Signal * signal){ } } - //@todo use section writer Uint32 count = 2; - Uint32 fragments[2 + 8*MAX_REPLICAS*MAX_NDB_NODES]; - Uint32 next_replica_node[MAX_NDB_NODES]; - memset(next_replica_node,0,sizeof(next_replica_node)); + Uint16 *fragments = (Uint16*)(signal->theData+25); if (primaryTableId == RNIL) { jam(); + Uint8 next_replica_node[MAX_NDB_NODES]; + memset(next_replica_node,0,sizeof(next_replica_node)); for(Uint32 fragNo = 0; fragNo<noOfFragments; fragNo++){ jam(); ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord); - - Uint32 ind = next_replica_node[NGPtr.i]; const Uint32 max = NGPtr.p->nodeCount; - - //------------------------------------------------------------------- - // We make an extra step to ensure that the primary replicas are - // spread among the nodes. - //------------------------------------------------------------------- - next_replica_node[NGPtr.i] = (ind + 1 >= max ? 0 : ind + 1); - - for(Uint32 replicaNo = 0; replicaNo<noOfReplicas; replicaNo++){ + + Uint32 tmp= next_replica_node[NGPtr.i]; + for(Uint32 replicaNo = 0; replicaNo<noOfReplicas; replicaNo++) + { jam(); - const Uint32 nodeId = NGPtr.p->nodesInGroup[ind++]; + const Uint32 nodeId = NGPtr.p->nodesInGroup[tmp++]; fragments[count++] = nodeId; - ind = (ind == max ? 0 : ind); + tmp = (tmp >= max ? 0 : tmp); } - + tmp++; + next_replica_node[NGPtr.i]= (tmp >= max ? 0 : tmp); + /** * Next node group for next fragment */ @@ -6445,26 +6500,42 @@ void Dbdih::execCREATE_FRAGMENTATION_REQ(Signal * signal){ fragments[0] = noOfReplicas; fragments[1] = noOfFragments; - LinearSectionPtr ptr[3]; - ptr[0].p = &fragments[0]; - ptr[0].sz = count; - sendSignal(senderRef, - GSN_CREATE_FRAGMENTATION_CONF, - signal, - CreateFragmentationConf::SignalLength, - JBB, - ptr, - 1); + if(senderRef != 0) + { + LinearSectionPtr ptr[3]; + ptr[0].p = (Uint32*)&fragments[0]; + ptr[0].sz = (count + 1) / 2; + sendSignal(senderRef, + GSN_CREATE_FRAGMENTATION_CONF, + signal, + CreateFragmentationConf::SignalLength, + JBB, + ptr, + 1); + } + else + { + // Execute direct + signal->theData[0] = 0; + } return; } while(false); - - CreateFragmentationRef * const ref = - (CreateFragmentationRef*)signal->getDataPtrSend(); - ref->senderRef = reference(); - ref->senderData = senderData; - ref->errorCode = err; - sendSignal(senderRef, GSN_CREATE_FRAGMENTATION_REF, signal, - CreateFragmentationRef::SignalLength, JBB); + + if(senderRef != 0) + { + CreateFragmentationRef * const ref = + (CreateFragmentationRef*)signal->getDataPtrSend(); + ref->senderRef = reference(); + ref->senderData = senderData; + ref->errorCode = err; + sendSignal(senderRef, GSN_CREATE_FRAGMENTATION_REF, signal, + CreateFragmentationRef::SignalLength, JBB); + } + else + { + // Execute direct + signal->theData[0] = err; + } } void Dbdih::execDIADDTABREQ(Signal* signal) @@ -6532,12 +6603,15 @@ void Dbdih::execDIADDTABREQ(Signal* signal) tabPtr.p->method = TabRecord::HASH; tabPtr.p->kvalue = req->kValue; - Uint32 fragments[2 + 8*MAX_REPLICAS*MAX_NDB_NODES]; + union { + Uint16 fragments[2 + MAX_FRAG_PER_NODE*MAX_REPLICAS*MAX_NDB_NODES]; + Uint32 align; + }; SegmentedSectionPtr fragDataPtr; signal->getSection(fragDataPtr, DiAddTabReq::FRAGMENTATION); - copy(fragments, fragDataPtr); + copy((Uint32*)fragments, fragDataPtr); releaseSections(signal); - + const Uint32 noReplicas = fragments[0]; const Uint32 noFragments = fragments[1]; @@ -6546,6 +6620,7 @@ void Dbdih::execDIADDTABREQ(Signal* signal) ndbrequire(noReplicas == cnoReplicas); // Only allowed if (ERROR_INSERTED(7173)) { + CLEAR_ERROR_INSERT_VALUE; addtabrefuseLab(signal, connectPtr, ZREPLERROR1); return; } @@ -6965,8 +7040,7 @@ void Dbdih::execDIGETNODESREQ(Signal* signal) TabRecord* regTabDesc = tabRecord; jamEntry(); ptrCheckGuard(tabPtr, ttabFileSize, regTabDesc); - hashValue = hashValue >> tabPtr.p->kvalue; - Uint32 fragId = tabPtr.p->mask & hashValue; + Uint32 fragId = hashValue & tabPtr.p->mask; ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_ACTIVE); if (fragId < tabPtr.p->hashpointer) { jam(); @@ -7327,7 +7401,7 @@ void Dbdih::startGcpLab(Signal* signal, Uint32 aWaitTime) /***************************************************************************/ // Report the event that a global checkpoint has started. /***************************************************************************/ - signal->theData[0] = EventReport::GlobalCheckpointStarted; //Event type + signal->theData[0] = NDB_LE_GlobalCheckpointStarted; //Event type signal->theData[1] = cnewgcp; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); @@ -7590,6 +7664,22 @@ void Dbdih::execDIHNDBTAMPER(Signal* signal) #ifdef ERROR_INSERT case 5: jam(); + if(tuserpointer == 0) + { + jam(); + signal->theData[0] = 0; + sendSignal(QMGR_REF, GSN_NDB_TAMPER, signal, 1, JBB); + sendSignal(NDBCNTR_REF, GSN_NDB_TAMPER, signal, 1, JBB); + sendSignal(NDBFS_REF, GSN_NDB_TAMPER, signal, 1, JBB); + sendSignal(DBACC_REF, GSN_NDB_TAMPER, signal, 1, JBB); + sendSignal(DBTUP_REF, GSN_NDB_TAMPER, signal, 1, JBB); + sendSignal(DBLQH_REF, GSN_NDB_TAMPER, signal, 1, JBB); + sendSignal(DBDICT_REF, GSN_NDB_TAMPER, signal, 1, JBB); + sendSignal(DBDIH_REF, GSN_NDB_TAMPER, signal, 1, JBB); + sendSignal(DBTC_REF, GSN_NDB_TAMPER, signal, 1, JBB); + sendSignal(CMVMI_REF, GSN_NDB_TAMPER, signal, 1, JBB); + return; + } /*----------------------------------------------------------------------*/ // Insert errors. /*----------------------------------------------------------------------*/ @@ -7808,7 +7898,7 @@ void Dbdih::execCOPY_GCICONF(Signal* signal) // Report the event that a global checkpoint has completed. /************************************************************************/ signal->setTrace(0); - signal->theData[0] = EventReport::GlobalCheckpointCompleted; //Event type + signal->theData[0] = NDB_LE_GlobalCheckpointCompleted; //Event type signal->theData[1] = coldgcp; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); @@ -7930,12 +8020,6 @@ void Dbdih::writingCopyGciLab(Signal* signal, FileRecordPtr filePtr) if (reason == CopyGCIReq::GLOBAL_CHECKPOINT) { jam(); cgcpParticipantState = GCP_PARTICIPANT_READY; - - SubGcpCompleteRep * const rep = (SubGcpCompleteRep*)signal->getDataPtr(); - rep->gci = coldgcp; - rep->senderData = 0; - sendSignal(SUMA_REF, GSN_SUB_GCP_COMPLETE_REP, signal, - SubGcpCompleteRep::SignalLength, JBB); } jam(); @@ -8168,11 +8252,21 @@ void Dbdih::openingTableErrorLab(Signal* signal, FileRecordPtr filePtr) /* WE FAILED IN OPENING A FILE. IF THE FIRST FILE THEN TRY WITH THE */ /* DUPLICATE FILE, OTHERWISE WE REPORT AN ERROR IN THE SYSTEM RESTART. */ /* ---------------------------------------------------------------------- */ - ndbrequire(filePtr.i == tabPtr.p->tabFile[0]); - filePtr.i = tabPtr.p->tabFile[1]; - ptrCheckGuard(filePtr, cfileFileSize, fileRecord); - openFileRw(signal, filePtr); - filePtr.p->reqStatus = FileRecord::OPENING_TABLE; + if (filePtr.i == tabPtr.p->tabFile[0]) + { + filePtr.i = tabPtr.p->tabFile[1]; + ptrCheckGuard(filePtr, cfileFileSize, fileRecord); + openFileRw(signal, filePtr); + filePtr.p->reqStatus = FileRecord::OPENING_TABLE; + } + else + { + char buf[256]; + BaseString::snprintf(buf, sizeof(buf), + "Error opening DIH schema files for table: %d", + tabPtr.i); + progError(__LINE__, NDBD_EXIT_AFS_NO_SUCH_FILE, buf); + } }//Dbdih::openingTableErrorLab() void Dbdih::readingTableLab(Signal* signal, FileRecordPtr filePtr) @@ -8294,7 +8388,7 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPtr){ /* --------------------------------------------------------------- */ /* THE NODE IS ALIVE AND KICKING AND ACTIVE, LET'S USE IT. */ /* --------------------------------------------------------------- */ - arrGuard(noCrashedReplicas, 8); + arrGuardErr(noCrashedReplicas, 8, NDBD_EXIT_MAX_CRASHED_REPLICAS); Uint32 lastGci = replicaPtr.p->replicaLastGci[noCrashedReplicas]; if(lastGci >= newestRestorableGCI){ jam(); @@ -8313,7 +8407,7 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPtr){ *--------_----------------------------------------------------- */ const Uint32 nextCrashed = noCrashedReplicas + 1; replicaPtr.p->noCrashedReplicas = nextCrashed; - arrGuard(nextCrashed, 8); + arrGuardErr(nextCrashed, 8, NDBD_EXIT_MAX_CRASHED_REPLICAS); replicaPtr.p->createGci[nextCrashed] = newestRestorableGCI + 1; ndbrequire(newestRestorableGCI + 1 != 0xF1F1F1F1); replicaPtr.p->replicaLastGci[nextCrashed] = (Uint32)-1; @@ -8768,14 +8862,10 @@ void Dbdih::startFragment(Signal* signal, Uint32 tableId, Uint32 fragId) /* THIS WILL DECREASE THE GCI TO RESTORE WHICH HOPEFULLY WILL MAKE IT */ /* POSSIBLE TO RESTORE THE SYSTEM. */ /* --------------------------------------------------------------------- */ - char buf[100]; - BaseString::snprintf(buf, sizeof(buf), - "Unable to find restorable replica for " - "table: %d fragment: %d gci: %d", - tableId, fragId, SYSFILE->newestRestorableGCI); - progError(__LINE__, - ERR_SYSTEM_ERROR, - buf); + char buf[64]; + BaseString::snprintf(buf, sizeof(buf), "table: %d fragment: %d gci: %d", + tableId, fragId, SYSFILE->newestRestorableGCI); + progError(__LINE__, NDBD_EXIT_NO_RESTORABLE_REPLICA, buf); ndbrequire(false); return; }//if @@ -9312,7 +9402,7 @@ void Dbdih::execTCGETOPSIZECONF(Signal* signal) ndbrequire(((int)c_lcpState.oldestRestorableGci) > 0); if (ERROR_INSERTED(7011)) { - signal->theData[0] = EventReport::LCPStoppedInCalcKeepGci; + signal->theData[0] = NDB_LE_LCPStoppedInCalcKeepGci; signal->theData[1] = 0; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); return; @@ -9398,7 +9488,7 @@ void Dbdih::storeNewLcpIdLab(Signal* signal) /***************************************************************************/ // Report the event that a local checkpoint has started. /***************************************************************************/ - signal->theData[0] = EventReport::LocalCheckpointStarted; //Event type + signal->theData[0] = NDB_LE_LocalCheckpointStarted; //Event type signal->theData[1] = SYSFILE->latestLCP_ID + 1; signal->theData[2] = c_lcpState.keepGci; signal->theData[3] = c_lcpState.oldestRestorableGci; @@ -9778,7 +9868,7 @@ void Dbdih::execLCP_FRAG_REP(Signal* signal) /* --------------------------------------------------------------------- */ // REPORT that local checkpoint have completed this fragment. /* --------------------------------------------------------------------- */ - signal->theData[0] = EventReport::LCPFragmentCompleted; + signal->theData[0] = NDB_LE_LCPFragmentCompleted; signal->theData[1] = nodeId; signal->theData[2] = tableId; signal->theData[3] = fragId; @@ -10229,7 +10319,7 @@ void Dbdih::allNodesLcpCompletedLab(Signal* signal) /***************************************************************************/ // Report the event that a local checkpoint has completed. /***************************************************************************/ - signal->theData[0] = EventReport::LocalCheckpointCompleted; //Event type + signal->theData[0] = NDB_LE_LocalCheckpointCompleted; //Event type signal->theData[1] = SYSFILE->latestLCP_ID; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); c_lcpState.lcpStopGcp = c_newest_restorable_gci; @@ -10387,7 +10477,91 @@ void Dbdih::tableCloseLab(Signal* signal, FileRecordPtr filePtr) * GCP stop detected, * send SYSTEM_ERROR to all other alive nodes */ -void Dbdih::crashSystemAtGcpStop(Signal* signal){ +void Dbdih::crashSystemAtGcpStop(Signal* signal) +{ + switch(cgcpStatus){ + case GCP_NODE_FINISHED: + { + /** + * We're waiting for a GCP save conf + */ + ndbrequire(!c_GCP_SAVEREQ_Counter.done()); + NodeReceiverGroup rg(DBLQH, c_GCP_SAVEREQ_Counter); + signal->theData[0] = 2305; + sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBB); + + infoEvent("Detected GCP stop...sending kill to %s", + c_GCP_SAVEREQ_Counter.getText()); + ndbout_c("Detected GCP stop...sending kill to %s", + c_GCP_SAVEREQ_Counter.getText()); + return; + } + case GCP_SAVE_LQH_FINISHED: + ndbout_c("m_copyReason: %d m_waiting: %d", + c_copyGCIMaster.m_copyReason, + c_copyGCIMaster.m_waiting); + break; + case GCP_READY: // shut up lint + case GCP_PREPARE_SENT: + case GCP_COMMIT_SENT: + break; + } + + ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d", + c_copyGCISlave.m_senderData, + c_copyGCISlave.m_senderRef, + c_copyGCISlave.m_copyReason, + c_copyGCISlave.m_expectedNextWord); + + FileRecordPtr file0Ptr; + file0Ptr.i = crestartInfoFile[0]; + ptrCheckGuard(file0Ptr, cfileFileSize, fileRecord); + FileRecordPtr file1Ptr; + file1Ptr.i = crestartInfoFile[1]; + ptrCheckGuard(file1Ptr, cfileFileSize, fileRecord); + + ndbout_c("file[0] status: %d type: %d reqStatus: %d file1: %d %d %d", + file0Ptr.p->fileStatus, file0Ptr.p->fileType, file0Ptr.p->reqStatus, + file1Ptr.p->fileStatus, file1Ptr.p->fileType, file1Ptr.p->reqStatus + ); + + signal->theData[0] = 404; + signal->theData[1] = file0Ptr.p->fileRef; + EXECUTE_DIRECT(NDBFS, GSN_DUMP_STATE_ORD, signal, 2); + + signal->theData[0] = 404; + signal->theData[1] = file1Ptr.p->fileRef; + EXECUTE_DIRECT(NDBFS, GSN_DUMP_STATE_ORD, signal, 2); + + ndbout_c("c_COPY_GCIREQ_Counter = %s", + c_COPY_GCIREQ_Counter.getText()); + ndbout_c("c_COPY_TABREQ_Counter = %s", + c_COPY_TABREQ_Counter.getText()); + ndbout_c("c_CREATE_FRAGREQ_Counter = %s", + c_CREATE_FRAGREQ_Counter.getText()); + ndbout_c("c_DIH_SWITCH_REPLICA_REQ_Counter = %s", + c_DIH_SWITCH_REPLICA_REQ_Counter.getText()); + ndbout_c("c_EMPTY_LCP_REQ_Counter = %s",c_EMPTY_LCP_REQ_Counter.getText()); + ndbout_c("c_END_TOREQ_Counter = %s", c_END_TOREQ_Counter.getText()); + ndbout_c("c_GCP_COMMIT_Counter = %s", c_GCP_COMMIT_Counter.getText()); + ndbout_c("c_GCP_PREPARE_Counter = %s", c_GCP_PREPARE_Counter.getText()); + ndbout_c("c_GCP_SAVEREQ_Counter = %s", c_GCP_SAVEREQ_Counter.getText()); + ndbout_c("c_INCL_NODEREQ_Counter = %s", c_INCL_NODEREQ_Counter.getText()); + ndbout_c("c_MASTER_GCPREQ_Counter = %s", + c_MASTER_GCPREQ_Counter.getText()); + ndbout_c("c_MASTER_LCPREQ_Counter = %s", + c_MASTER_LCPREQ_Counter.getText()); + ndbout_c("c_START_INFOREQ_Counter = %s", + c_START_INFOREQ_Counter.getText()); + ndbout_c("c_START_RECREQ_Counter = %s", c_START_RECREQ_Counter.getText()); + ndbout_c("c_START_TOREQ_Counter = %s", c_START_TOREQ_Counter.getText()); + ndbout_c("c_STOP_ME_REQ_Counter = %s", c_STOP_ME_REQ_Counter.getText()); + ndbout_c("c_TC_CLOPSIZEREQ_Counter = %s", + c_TC_CLOPSIZEREQ_Counter.getText()); + ndbout_c("c_TCGETOPSIZEREQ_Counter = %s", + c_TCGETOPSIZEREQ_Counter.getText()); + ndbout_c("c_UPDATE_TOREQ_Counter = %s", c_UPDATE_TOREQ_Counter.getText()); + NodeRecordPtr nodePtr; for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) { jam(); @@ -10543,7 +10717,7 @@ void Dbdih::calculateHotSpare() break; default: jam(); - progError(0, 0); + ndbrequire(false); break; }//switch }//Dbdih::calculateHotSpare() @@ -10576,7 +10750,7 @@ void Dbdih::checkEscalation() jam(); if (TnodeGroup[i] == ZFALSE) { jam(); - progError(__LINE__, ERR_SYSTEM_ERROR, "Lost node group"); + progError(__LINE__, NDBD_EXIT_LOST_NODE_GROUP, "Lost node group"); }//if }//for }//Dbdih::checkEscalation() @@ -11086,7 +11260,7 @@ void Dbdih::initCommonData() ndb_mgm_get_int_parameter(p, CFG_DB_NO_REPLICAS, &cnoReplicas); if (cnoReplicas > 4) { - progError(__LINE__, ERR_INVALID_CONFIG, + progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, "Only up to four replicas are supported. Check NoOfReplicas."); } @@ -11565,16 +11739,14 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[]) NodeRecordPtr mngNodeptr; Uint32 tmngNode; Uint32 tmngNodeGroup; - Uint32 tmngReplica; Uint32 tmngLimit; - Uint32 i; + Uint32 i, j; /**----------------------------------------------------------------------- * ASSIGN ALL ACTIVE NODES INTO NODE GROUPS. HOT SPARE NODES ARE ASSIGNED * TO NODE GROUP ZNIL *-----------------------------------------------------------------------*/ tmngNodeGroup = 0; - tmngReplica = 0; tmngLimit = csystemnodes - cnoHotSpare; ndbrequire(tmngLimit < MAX_NDB_NODES); for (i = 0; i < tmngLimit; i++) { @@ -11586,13 +11758,11 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[]) mngNodeptr.p->nodeGroup = tmngNodeGroup; NGPtr.i = tmngNodeGroup; ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord); - arrGuard(tmngReplica, MAX_REPLICAS); - NGPtr.p->nodesInGroup[tmngReplica] = mngNodeptr.i; - tmngReplica++; - if (tmngReplica == cnoReplicas) { + arrGuard(NGPtr.p->nodeCount, MAX_REPLICAS); + NGPtr.p->nodesInGroup[NGPtr.p->nodeCount++] = mngNodeptr.i; + if (NGPtr.p->nodeCount == cnoReplicas) { jam(); tmngNodeGroup++; - tmngReplica = 0; }//if }//for cnoOfNodeGroups = tmngNodeGroup; @@ -11616,6 +11786,38 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[]) Sysfile::setNodeGroup(mngNodeptr.i, SYSFILE->nodeGroups, mngNodeptr.p->nodeGroup); }//if }//for + + for (i = 0; i<cnoOfNodeGroups; i++) + { + jam(); + bool alive = false; + NodeGroupRecordPtr NGPtr; + NGPtr.i = i; + ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord); + for (j = 0; j<NGPtr.p->nodeCount; j++) + { + jam(); + mngNodeptr.i = NGPtr.p->nodesInGroup[j]; + ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord); + if (checkNodeAlive(NGPtr.p->nodesInGroup[j])) + { + alive = true; + break; + } + } + + if (!alive) + { + char buf[255]; + BaseString::snprintf + (buf, sizeof(buf), + "Illegal initial start, no alive node in nodegroup %u", i); + progError(__LINE__, + NDBD_EXIT_SR_RESTARTCONFLICT, + buf); + + } + } }//Dbdih::makeNodeGroups() /** @@ -11751,7 +11953,8 @@ void Dbdih::newCrashedReplica(Uint32 nodeId, ReplicaRecordPtr ncrReplicaPtr) /* THAT THE NEW REPLICA IS NOT STARTED YET AND REPLICA_LAST_GCI IS*/ /* SET TO -1 TO INDICATE THAT IT IS NOT DEAD YET. */ /*----------------------------------------------------------------------*/ - arrGuard(ncrReplicaPtr.p->noCrashedReplicas + 1, 8); + arrGuardErr(ncrReplicaPtr.p->noCrashedReplicas + 1, 8, + NDBD_EXIT_MAX_CRASHED_REPLICAS); ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] = SYSFILE->lastCompletedGCI[nodeId]; ncrReplicaPtr.p->noCrashedReplicas = ncrReplicaPtr.p->noCrashedReplicas + 1; @@ -12423,7 +12626,6 @@ void Dbdih::sendStartFragreq(Signal* signal, void Dbdih::setInitialActiveStatus() { NodeRecordPtr siaNodeptr; - Uint32 tsiaNodeActiveStatus; Uint32 tsiaNoActiveNodes; tsiaNoActiveNodes = csystemnodes - cnoHotSpare; @@ -12431,39 +12633,34 @@ void Dbdih::setInitialActiveStatus() SYSFILE->nodeStatus[i] = 0; for (siaNodeptr.i = 1; siaNodeptr.i < MAX_NDB_NODES; siaNodeptr.i++) { ptrAss(siaNodeptr, nodeRecord); - if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE) { + switch(siaNodeptr.p->nodeStatus){ + case NodeRecord::ALIVE: + case NodeRecord::DEAD: if (tsiaNoActiveNodes == 0) { jam(); siaNodeptr.p->activeStatus = Sysfile::NS_HotSpare; } else { jam(); tsiaNoActiveNodes = tsiaNoActiveNodes - 1; - siaNodeptr.p->activeStatus = Sysfile::NS_Active; - }//if - } else { - jam(); - siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined; - }//if - switch (siaNodeptr.p->activeStatus) { - case Sysfile::NS_Active: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_Active; - break; - case Sysfile::NS_HotSpare: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_HotSpare; - break; - case Sysfile::NS_NotDefined: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_NotDefined; + if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE) + { + jam(); + siaNodeptr.p->activeStatus = Sysfile::NS_Active; + } + else + { + siaNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver; + } + } break; default: - ndbrequire(false); - return; + jam(); + siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined; break; - }//switch - Sysfile::setNodeStatus(siaNodeptr.i, SYSFILE->nodeStatus, - tsiaNodeActiveStatus); + }//if + Sysfile::setNodeStatus(siaNodeptr.i, + SYSFILE->nodeStatus, + siaNodeptr.p->activeStatus); }//for }//Dbdih::setInitialActiveStatus() @@ -14499,3 +14696,118 @@ Dbdih::NodeRecord::NodeRecord(){ copyCompleted = false; allowNodeStart = true; } + +// DICT lock slave + +void +Dbdih::sendDictLockReq(Signal* signal, Uint32 lockType, Callback c) +{ + DictLockReq* req = (DictLockReq*)&signal->theData[0]; + DictLockSlavePtr lockPtr; + + c_dictLockSlavePool.seize(lockPtr); + ndbrequire(lockPtr.i != RNIL); + + req->userPtr = lockPtr.i; + req->lockType = lockType; + req->userRef = reference(); + + lockPtr.p->lockPtr = RNIL; + lockPtr.p->lockType = lockType; + lockPtr.p->locked = false; + lockPtr.p->callback = c; + + // handle rolling upgrade + { + Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version; + + unsigned int get_major = getMajor(masterVersion); + unsigned int get_minor = getMinor(masterVersion); + unsigned int get_build = getBuild(masterVersion); + + ndbrequire(get_major == 4 || get_major == 5); + + if (masterVersion < NDBD_DICT_LOCK_VERSION_5 || + ERROR_INSERTED(7176)) { + jam(); + + infoEvent("DIH: detect upgrade: master node %u old version %u.%u.%u", + (unsigned int)cmasterNodeId, get_major, get_minor, get_build); + + DictLockConf* conf = (DictLockConf*)&signal->theData[0]; + conf->userPtr = lockPtr.i; + conf->lockType = lockType; + conf->lockPtr = ZNIL; + + sendSignal(reference(), GSN_DICT_LOCK_CONF, signal, + DictLockConf::SignalLength, JBB); + return; + } + } + + BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId); + sendSignal(dictMasterRef, GSN_DICT_LOCK_REQ, signal, + DictLockReq::SignalLength, JBB); +} + +void +Dbdih::execDICT_LOCK_CONF(Signal* signal) +{ + jamEntry(); + recvDictLockConf(signal); +} + +void +Dbdih::execDICT_LOCK_REF(Signal* signal) +{ + jamEntry(); + ndbrequire(false); +} + +void +Dbdih::recvDictLockConf(Signal* signal) +{ + const DictLockConf* conf = (const DictLockConf*)&signal->theData[0]; + + DictLockSlavePtr lockPtr; + c_dictLockSlavePool.getPtr(lockPtr, conf->userPtr); + + lockPtr.p->lockPtr = conf->lockPtr; + ndbrequire(lockPtr.p->lockType == conf->lockType); + ndbrequire(lockPtr.p->locked == false); + lockPtr.p->locked = true; + + lockPtr.p->callback.m_callbackData = lockPtr.i; + execute(signal, lockPtr.p->callback, 0); +} + +void +Dbdih::sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI) +{ + DictUnlockOrd* ord = (DictUnlockOrd*)&signal->theData[0]; + + DictLockSlavePtr lockPtr; + c_dictLockSlavePool.getPtr(lockPtr, lockSlavePtrI); + + ord->lockPtr = lockPtr.p->lockPtr; + ord->lockType = lockPtr.p->lockType; + + c_dictLockSlavePool.release(lockPtr); + + // handle rolling upgrade + { + Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version; + + unsigned int get_major = getMajor(masterVersion); + ndbrequire(get_major == 4 || get_major == 5); + + if (masterVersion < NDBD_DICT_LOCK_VERSION_5 || + ERROR_INSERTED(7176)) { + return; + } + } + + BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId); + sendSignal(dictMasterRef, GSN_DICT_UNLOCK_ORD, signal, + DictUnlockOrd::SignalLength, JBB); +} diff --git a/ndb/src/kernel/blocks/dblqh/Dblqh.hpp b/ndb/src/kernel/blocks/dblqh/Dblqh.hpp index 7cca121d909..d3ba8521226 100644 --- a/ndb/src/kernel/blocks/dblqh/Dblqh.hpp +++ b/ndb/src/kernel/blocks/dblqh/Dblqh.hpp @@ -29,6 +29,9 @@ #include <signaldata/LqhTransConf.hpp> #include <signaldata/LqhFrag.hpp> +// primary key is stored in TUP +#include <../dbtup/Dbtup.hpp> + #ifdef DBLQH_C // Constants /* ------------------------------------------------------------------------- */ @@ -441,7 +444,6 @@ public: UintR dictConnectptr; UintR fragmentPtr; UintR nextAddfragrec; - UintR noOfAllocPages; UintR schemaVer; UintR tup1Connectptr; UintR tup2Connectptr; @@ -463,12 +465,17 @@ public: Uint16 totalAttrReceived; Uint16 fragCopyCreation; Uint16 noOfKeyAttr; - Uint32 noOfNewAttr; // noOfCharsets in upper half + Uint16 noOfNewAttr; + Uint16 noOfCharsets; Uint16 noOfAttributeGroups; Uint16 lh3DistrBits; Uint16 tableType; Uint16 primaryTableId; - };// Size 108 bytes + Uint32 maxRowsLow; + Uint32 maxRowsHigh; + Uint32 minRowsLow; + Uint32 minRowsHigh; + };// Size 124 bytes typedef Ptr<AddFragRecord> AddFragRecordPtr; /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */ @@ -512,9 +519,7 @@ public: WAIT_DELETE_STORED_PROC_ID_COPY = 6, WAIT_ACC_COPY = 7, WAIT_ACC_SCAN = 8, - WAIT_SCAN_KEYINFO = 9, WAIT_SCAN_NEXTREQ = 10, - WAIT_COPY_KEYINFO = 11, WAIT_CLOSE_SCAN = 12, WAIT_CLOSE_COPY = 13, WAIT_RELEASE_LOCK = 14, @@ -576,6 +581,9 @@ public: NodeId scanNodeId; Uint16 scanReleaseCounter; Uint16 scanNumber; + + // scan source block ACC TUX TUP + BlockReference scanBlockref; Uint8 scanCompletedStatus; Uint8 scanFlag; @@ -583,6 +591,8 @@ public: Uint8 scanLockMode; Uint8 readCommitted; Uint8 rangeScan; + Uint8 descending; + Uint8 tupScan; Uint8 scanTcWaiting; Uint8 scanKeyinfoFlag; Uint8 m_last_row; @@ -879,10 +889,6 @@ public: */ Uint8 fragDistributionKey; /** - * Used to calculate which local fragment to use. - */ - Uint8 hashCheckBit; - /** * The identity of the next local checkpoint this fragment * should perform. */ @@ -2022,8 +2028,10 @@ public: BlockReference tcTuxBlockref; BlockReference tcTupBlockref; Uint32 commitAckMarker; - UintR noFiredTriggers; - + union { + Uint32 m_scan_curr_range_no; + UintR noFiredTriggers; + }; Uint16 errorCode; Uint16 logStartPageIndex; Uint16 logStartPageNo; @@ -2045,6 +2053,7 @@ public: Uint8 opExec; Uint8 operation; Uint8 reclenAiLqhkey; + Uint8 m_offset_current_keybuf; Uint8 replicaType; Uint8 simpleRead; Uint8 seqNoReplica; @@ -2148,8 +2157,6 @@ private: void execACC_SCANREF(Signal* signal); void execNEXT_SCANCONF(Signal* signal); void execNEXT_SCANREF(Signal* signal); - void execACC_SCAN_INFO(Signal* signal); - void execACC_SCAN_INFO24(Signal* signal); void execACC_TO_REF(Signal* signal); void execSTORED_PROCCONF(Signal* signal); void execSTORED_PROCREF(Signal* signal); @@ -2239,7 +2246,7 @@ private: void LQHKEY_abort(Signal* signal, int errortype); void LQHKEY_error(Signal* signal, int errortype); void nextRecordCopy(Signal* signal); - void calculateHash(Signal* signal); + Uint32 calculateHash(Uint32 tableId, const Uint32* src); void continueAfterCheckLcpStopBlocked(Signal* signal); void checkLcpStopBlockedLab(Signal* signal); void sendCommittedTc(Signal* signal, BlockReference atcBlockref); @@ -2267,7 +2274,7 @@ private: void finishScanrec(Signal* signal); void releaseScanrec(Signal* signal); void seizeScanrec(Signal* signal); - void sendKeyinfo20(Signal* signal, ScanRecord *, TcConnectionrec *); + Uint32 sendKeyinfo20(Signal* signal, ScanRecord *, TcConnectionrec *); void sendScanFragConf(Signal* signal, Uint32 scanCompleted); void initCopyrec(Signal* signal); void initCopyTc(Signal* signal); @@ -2397,6 +2404,8 @@ private: void seizeAttrinbuf(Signal* signal); Uint32 seize_attrinbuf(); Uint32 release_attrinbuf(Uint32); + Uint32 copy_bounds(Uint32 * dst, TcConnectionrec*); + void seizeFragmentrec(Signal* signal); void seizePageRef(Signal* signal); void seizeTcrec(); @@ -2408,7 +2417,7 @@ private: void startNextExecSr(Signal* signal); void startTimeSupervision(Signal* signal); void stepAhead(Signal* signal, Uint32 stepAheadWords); - void systemError(Signal* signal); + void systemError(Signal* signal, int line); void writeAbortLog(Signal* signal); void writeCommitLog(Signal* signal, LogPartRecordPtr regLogPartPtr); void writeCompletedGciLog(Signal* signal); @@ -2427,7 +2436,7 @@ private: Uint32 calcPageCheckSum(LogPageRecordPtr logP); // Generated statement blocks - void systemErrorLab(Signal* signal); + void systemErrorLab(Signal* signal, int line); void initFourth(Signal* signal); void packLqhkeyreqLab(Signal* signal); void sendNdbSttorryLab(Signal* signal); @@ -2437,7 +2446,6 @@ private: void srLogLimits(Signal* signal); void srGciLimits(Signal* signal); void srPhase3Start(Signal* signal); - void warningHandlerLab(Signal* signal); void checkStartCompletedLab(Signal* signal); void continueAbortLab(Signal* signal); void abortContinueAfterBlockedLab(Signal* signal, bool canBlock); @@ -2445,7 +2453,6 @@ private: void localCommitLab(Signal* signal); void abortErrorLab(Signal* signal); void continueAfterReceivingAllAiLab(Signal* signal); - void sendScanFragRefLateLab(Signal* signal); void abortStateHandlerLab(Signal* signal); void writeAttrinfoLab(Signal* signal); void scanAttrinfoLab(Signal* signal, Uint32* dataPtr, Uint32 length); @@ -2512,7 +2519,7 @@ private: void nextScanConfScanLab(Signal* signal); void nextScanConfCopyLab(Signal* signal); void continueScanNextReqLab(Signal* signal); - bool keyinfoLab(Signal* signal, Uint32* dataPtr, Uint32 length); + void keyinfoLab(const Uint32 * src, const Uint32 * end); void copySendTupkeyReqLab(Signal* signal); void storedProcConfScanLab(Signal* signal); void storedProcConfCopyLab(Signal* signal); @@ -2568,7 +2575,6 @@ private: void accScanConfScanLab(Signal* signal); void accScanConfCopyLab(Signal* signal); void scanLockReleasedLab(Signal* signal); - void accScanInfoEnterLab(Signal* signal, Uint32* dataPtr, Uint32 length); void openSrFourthNextLab(Signal* signal); void closingInitLab(Signal* signal); void closeExecSrCompletedLab(Signal* signal); @@ -2583,6 +2589,8 @@ private: void initData(); void initRecords(); + Dbtup* c_tup; + Uint32 readPrimaryKeys(ScanRecord*, TcConnectionrec*, Uint32 * dst); // ---------------------------------------------------------------- // These are variables handling the records. For most records one // pointer to the array of structs, one pointer-struct, a file size diff --git a/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp b/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp index f9dd63e782d..04400f75255 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp @@ -273,8 +273,6 @@ Dblqh::Dblqh(const class Configuration & conf): addRecSignal(GSN_ACC_SCANREF, &Dblqh::execACC_SCANREF); addRecSignal(GSN_NEXT_SCANCONF, &Dblqh::execNEXT_SCANCONF); addRecSignal(GSN_NEXT_SCANREF, &Dblqh::execNEXT_SCANREF); - addRecSignal(GSN_ACC_SCAN_INFO, &Dblqh::execACC_SCAN_INFO); - addRecSignal(GSN_ACC_SCAN_INFO24, &Dblqh::execACC_SCAN_INFO24); addRecSignal(GSN_STORED_PROCCONF, &Dblqh::execSTORED_PROCCONF); addRecSignal(GSN_STORED_PROCREF, &Dblqh::execSTORED_PROCREF); addRecSignal(GSN_COPY_FRAGREQ, &Dblqh::execCOPY_FRAGREQ); diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index e4289715c38..87f2849aebc 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -55,6 +55,7 @@ #include <signaldata/AlterTab.hpp> #include <signaldata/LCP.hpp> +#include <KeyDescriptor.hpp> // Use DEBUG to print messages that should be // seen only when we debug the product @@ -167,9 +168,11 @@ void Dblqh::execTUP_COM_UNBLOCK(Signal* signal) /* ------- SEND SYSTEM ERROR ------- */ /* */ /* ------------------------------------------------------------------------- */ -void Dblqh::systemError(Signal* signal) +void Dblqh::systemError(Signal* signal, int line) { - progError(0, 0); + signal->theData[0] = 2304; + execDUMP_STATE_ORD(signal); + progError(line, NDBD_EXIT_NDBREQUIRE); }//Dblqh::systemError() /* *************** */ @@ -420,7 +423,7 @@ void Dblqh::execCONTINUEB(Signal* signal) // Report information about transaction activity once per second. /* --------------------------------------------------------------------- */ if (signal->theData[1] == 0) { - signal->theData[0] = EventReport::OperationReportCounters; + signal->theData[0] = NDB_LE_OperationReportCounters; signal->theData[1] = c_Counters.operations; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); }//if @@ -517,6 +520,8 @@ void Dblqh::execSTTOR(Signal* signal) jam(); cstartPhase = tstartPhase; sttorStartphase1Lab(signal); + c_tup = (Dbtup*)globalData.getBlock(DBTUP); + ndbrequire(c_tup != 0); return; break; default: @@ -961,12 +966,16 @@ void Dblqh::execLQHFRAGREQ(Signal* signal) Uint8 tlh = req->lh3PageBits; Uint32 tnoOfAttr = req->noOfAttributes; Uint32 tnoOfNull = req->noOfNullAttributes; - Uint32 noOfAlloc = req->noOfPagesToPreAllocate; + Uint32 maxRowsLow = req->maxRowsLow; + Uint32 maxRowsHigh = req->maxRowsHigh; + Uint32 minRowsLow = req->minRowsLow; + Uint32 minRowsHigh = req->minRowsHigh; Uint32 tschemaVersion = req->schemaVersion; Uint32 ttupKeyLength = req->keyLength; Uint32 nextLcp = req->nextLCP; Uint32 noOfKeyAttr = req->noOfKeyAttr; Uint32 noOfNewAttr = req->noOfNewAttr; + Uint32 noOfCharsets = req->noOfCharsets; Uint32 checksumIndicator = req->checksumIndicator; Uint32 noOfAttributeGroups = req->noOfAttributeGroups; Uint32 gcpIndicator = req->GCPIndicator; @@ -1064,7 +1073,10 @@ void Dblqh::execLQHFRAGREQ(Signal* signal) addfragptr.p->m_senderAttrPtr = RNIL; addfragptr.p->noOfAttr = tnoOfAttr; addfragptr.p->noOfNull = tnoOfNull; - addfragptr.p->noOfAllocPages = noOfAlloc; + addfragptr.p->maxRowsLow = maxRowsLow; + addfragptr.p->maxRowsHigh = maxRowsHigh; + addfragptr.p->minRowsLow = minRowsLow; + addfragptr.p->minRowsHigh = minRowsHigh; addfragptr.p->tabId = tabptr.i; addfragptr.p->totalAttrReceived = 0; addfragptr.p->attrSentToTup = ZNIL;/* TO FIND PROGRAMMING ERRORS QUICKLY */ @@ -1074,6 +1086,7 @@ void Dblqh::execLQHFRAGREQ(Signal* signal) addfragptr.p->addfragErrorCode = 0; addfragptr.p->noOfKeyAttr = noOfKeyAttr; addfragptr.p->noOfNewAttr = noOfNewAttr; + addfragptr.p->noOfCharsets = noOfCharsets; addfragptr.p->checksumIndicator = checksumIndicator; addfragptr.p->noOfAttributeGroups = noOfAttributeGroups; addfragptr.p->GCPIndicator = gcpIndicator; @@ -1114,8 +1127,8 @@ void Dblqh::execLQHFRAGREQ(Signal* signal) if (DictTabInfo::isOrderedIndex(tableType)) { jam(); // NOTE: next 2 lines stolen from ACC - addfragptr.p->fragid1 = (0 << tlhstar) | fragId; - addfragptr.p->fragid2 = (1 << tlhstar) | fragId; + addfragptr.p->fragid1 = (fragId << 1) | 0; + addfragptr.p->fragid2 = (fragId << 1) | 1; addfragptr.p->addfragStatus = AddFragRecord::WAIT_TWO_TUP; sendAddFragReq(signal); return; @@ -1135,7 +1148,6 @@ void Dblqh::execACCFRAGCONF(Signal* signal) Uint32 fragId2 = signal->theData[3]; Uint32 accFragPtr1 = signal->theData[4]; Uint32 accFragPtr2 = signal->theData[5]; - Uint32 hashCheckBit = signal->theData[6]; ptrCheckGuard(addfragptr, caddfragrecFileSize, addFragRecord); ndbrequire(addfragptr.p->addfragStatus == AddFragRecord::ACC_ADDFRAG); @@ -1146,7 +1158,6 @@ void Dblqh::execACCFRAGCONF(Signal* signal) ptrCheckGuard(fragptr, cfragrecFileSize, fragrecord); fragptr.p->accFragptr[0] = accFragPtr1; fragptr.p->accFragptr[1] = accFragPtr2; - fragptr.p->hashCheckBit = hashCheckBit; addfragptr.p->addfragStatus = AddFragRecord::WAIT_TWO_TUP; sendAddFragReq(signal); @@ -1245,47 +1256,56 @@ Dblqh::sendAddFragReq(Signal* signal) ptrCheckGuard(fragptr, cfragrecFileSize, fragrecord); if (addfragptr.p->addfragStatus == AddFragRecord::WAIT_TWO_TUP || addfragptr.p->addfragStatus == AddFragRecord::WAIT_ONE_TUP) { + TupFragReq* const tupFragReq = (TupFragReq*)signal->getDataPtrSend(); if (DictTabInfo::isTable(addfragptr.p->tableType) || DictTabInfo::isHashIndex(addfragptr.p->tableType)) { jam(); - signal->theData[0] = addfragptr.i; - signal->theData[1] = cownref; - signal->theData[2] = 0; /* ADD TABLE */ - signal->theData[3] = addfragptr.p->tabId; - signal->theData[4] = addfragptr.p->noOfAttr; - signal->theData[5] = + tupFragReq->userPtr = addfragptr.i; + tupFragReq->userRef = cownref; + tupFragReq->reqInfo = 0; /* ADD TABLE */ + tupFragReq->tableId = addfragptr.p->tabId; + tupFragReq->noOfAttr = addfragptr.p->noOfAttr; + tupFragReq->fragId = addfragptr.p->addfragStatus == AddFragRecord::WAIT_TWO_TUP ? addfragptr.p->fragid1 : addfragptr.p->fragid2; - signal->theData[6] = (addfragptr.p->noOfAllocPages >> 1) + 1; - signal->theData[7] = addfragptr.p->noOfNull; - signal->theData[8] = addfragptr.p->schemaVer; - signal->theData[9] = addfragptr.p->noOfKeyAttr; - signal->theData[10] = addfragptr.p->noOfNewAttr; - signal->theData[11] = addfragptr.p->checksumIndicator; - signal->theData[12] = addfragptr.p->noOfAttributeGroups; - signal->theData[13] = addfragptr.p->GCPIndicator; + tupFragReq->maxRowsLow = addfragptr.p->maxRowsLow; + tupFragReq->maxRowsHigh = addfragptr.p->maxRowsHigh; + tupFragReq->minRowsLow = addfragptr.p->minRowsLow; + tupFragReq->minRowsHigh = addfragptr.p->minRowsHigh; + tupFragReq->noOfNullAttr = addfragptr.p->noOfNull; + tupFragReq->schemaVersion = addfragptr.p->schemaVer; + tupFragReq->noOfKeyAttr = addfragptr.p->noOfKeyAttr; + tupFragReq->noOfNewAttr = addfragptr.p->noOfNewAttr; + tupFragReq->noOfCharsets = addfragptr.p->noOfCharsets; + tupFragReq->checksumIndicator = addfragptr.p->checksumIndicator; + tupFragReq->noOfAttributeGroups = addfragptr.p->noOfAttributeGroups; + tupFragReq->globalCheckpointIdIndicator = addfragptr.p->GCPIndicator; sendSignal(fragptr.p->tupBlockref, GSN_TUPFRAGREQ, signal, TupFragReq::SignalLength, JBB); return; } if (DictTabInfo::isOrderedIndex(addfragptr.p->tableType)) { jam(); - signal->theData[0] = addfragptr.i; - signal->theData[1] = cownref; - signal->theData[2] = 0; /* ADD TABLE */ - signal->theData[3] = addfragptr.p->tabId; - signal->theData[4] = 1; /* ordered index: one array attr */ - signal->theData[5] = + tupFragReq->userPtr = addfragptr.i; + tupFragReq->userRef = cownref; + tupFragReq->reqInfo = 0; /* ADD TABLE */ + tupFragReq->tableId = addfragptr.p->tabId; + tupFragReq->noOfAttr = 1; /* ordered index: one array attr */ + tupFragReq->fragId = addfragptr.p->addfragStatus == AddFragRecord::WAIT_TWO_TUP ? addfragptr.p->fragid1 : addfragptr.p->fragid2; - signal->theData[6] = (addfragptr.p->noOfAllocPages >> 1) + 1; - signal->theData[7] = 0; /* ordered index: no nullable */ - signal->theData[8] = addfragptr.p->schemaVer; - signal->theData[9] = 1; /* ordered index: one key */ - signal->theData[10] = addfragptr.p->noOfNewAttr; - signal->theData[11] = addfragptr.p->checksumIndicator; - signal->theData[12] = addfragptr.p->noOfAttributeGroups; - signal->theData[13] = addfragptr.p->GCPIndicator; + tupFragReq->maxRowsLow = addfragptr.p->maxRowsLow; + tupFragReq->maxRowsHigh = addfragptr.p->maxRowsHigh; + tupFragReq->minRowsLow = addfragptr.p->minRowsLow; + tupFragReq->minRowsHigh = addfragptr.p->minRowsHigh; + tupFragReq->noOfNullAttr = 0; /* ordered index: no nullable */ + tupFragReq->schemaVersion = addfragptr.p->schemaVer; + tupFragReq->noOfKeyAttr = 1; /* ordered index: one key */ + tupFragReq->noOfNewAttr = addfragptr.p->noOfNewAttr; + tupFragReq->noOfCharsets = addfragptr.p->noOfCharsets; + tupFragReq->checksumIndicator = addfragptr.p->checksumIndicator; + tupFragReq->noOfAttributeGroups = addfragptr.p->noOfAttributeGroups; + tupFragReq->globalCheckpointIdIndicator = addfragptr.p->GCPIndicator; sendSignal(fragptr.p->tupBlockref, GSN_TUPFRAGREQ, signal, TupFragReq::SignalLength, JBB); return; @@ -1304,7 +1324,7 @@ Dblqh::sendAddFragReq(Signal* signal) tuxreq->noOfAttr = addfragptr.p->noOfAttr - 1; /* skip NDB$TNODE */ tuxreq->fragId = addfragptr.p->addfragStatus == AddFragRecord::WAIT_TWO_TUX - ? addfragptr.p->fragid1 : addfragptr.p->fragid2; + ? addfragptr.p->fragid1: addfragptr.p->fragid2; tuxreq->fragOff = addfragptr.p->lh3DistrBits; tuxreq->tableType = addfragptr.p->tableType; tuxreq->primaryTableId = addfragptr.p->primaryTableId; @@ -1604,28 +1624,35 @@ void Dblqh::abortAddFragOps(Signal* signal) { fragptr.i = addfragptr.p->fragmentPtr; ptrCheckGuard(fragptr, cfragrecFileSize, fragrecord); - signal->theData[0] = (Uint32)-1; if (addfragptr.p->tup1Connectptr != RNIL) { jam(); - signal->theData[1] = addfragptr.p->tup1Connectptr; + TupFragReq* const tupFragReq = (TupFragReq*)signal->getDataPtrSend(); + tupFragReq->userPtr = (Uint32)-1; + tupFragReq->userRef = addfragptr.p->tup1Connectptr; sendSignal(fragptr.p->tupBlockref, GSN_TUPFRAGREQ, signal, 2, JBB); addfragptr.p->tup1Connectptr = RNIL; } if (addfragptr.p->tup2Connectptr != RNIL) { jam(); - signal->theData[1] = addfragptr.p->tup2Connectptr; + TupFragReq* const tupFragReq = (TupFragReq*)signal->getDataPtrSend(); + tupFragReq->userPtr = (Uint32)-1; + tupFragReq->userRef = addfragptr.p->tup2Connectptr; sendSignal(fragptr.p->tupBlockref, GSN_TUPFRAGREQ, signal, 2, JBB); addfragptr.p->tup2Connectptr = RNIL; } if (addfragptr.p->tux1Connectptr != RNIL) { jam(); - signal->theData[1] = addfragptr.p->tux1Connectptr; + TuxFragReq* const tuxFragReq = (TuxFragReq*)signal->getDataPtrSend(); + tuxFragReq->userPtr = (Uint32)-1; + tuxFragReq->userRef = addfragptr.p->tux1Connectptr; sendSignal(fragptr.p->tuxBlockref, GSN_TUXFRAGREQ, signal, 2, JBB); addfragptr.p->tux1Connectptr = RNIL; } if (addfragptr.p->tux2Connectptr != RNIL) { jam(); - signal->theData[1] = addfragptr.p->tux2Connectptr; + TuxFragReq* const tuxFragReq = (TuxFragReq*)signal->getDataPtrSend(); + tuxFragReq->userPtr = (Uint32)-1; + tuxFragReq->userRef = addfragptr.p->tux2Connectptr; sendSignal(fragptr.p->tuxBlockref, GSN_TUXFRAGREQ, signal, 2, JBB); addfragptr.p->tux2Connectptr = RNIL; } @@ -2102,7 +2129,7 @@ void Dblqh::execTIME_SIGNAL(Signal* signal) if ((cCounterAccCommitBlocked > 0) || (cCounterTupCommitBlocked > 0)) { jam(); - signal->theData[0] = EventReport::UndoLogBlocked; + signal->theData[0] = NDB_LE_UndoLogBlocked; signal->theData[1] = cCounterTupCommitBlocked; signal->theData[2] = cCounterAccCommitBlocked; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); @@ -2646,12 +2673,20 @@ Dblqh::execREAD_PSUEDO_REQ(Signal* signal){ regTcPtr.i = signal->theData[0]; ptrCheckGuard(regTcPtr, ctcConnectrecFileSize, tcConnectionrec); - FragrecordPtr regFragptr; - regFragptr.i = regTcPtr.p->fragmentptr; - ptrCheckGuard(regFragptr, cfragrecFileSize, fragrecord); - - signal->theData[0] = regFragptr.p->accFragptr[regTcPtr.p->localFragptr]; - EXECUTE_DIRECT(DBACC, GSN_READ_PSUEDO_REQ, signal, 2); + if(signal->theData[1] != AttributeHeader::RANGE_NO) + { + jam(); + FragrecordPtr regFragptr; + regFragptr.i = regTcPtr.p->fragmentptr; + ptrCheckGuard(regFragptr, cfragrecFileSize, fragrecord); + + signal->theData[0] = regFragptr.p->accFragptr[regTcPtr.p->localFragptr]; + EXECUTE_DIRECT(DBACC, GSN_READ_PSUEDO_REQ, signal, 2); + } + else + { + signal->theData[0] = regTcPtr.p->m_scan_curr_range_no; + } } /* ************>> */ @@ -2666,11 +2701,11 @@ void Dblqh::execTUPKEYCONF(Signal* signal) jamEntry(); tcConnectptr.i = tcIndex; ptrCheckGuard(tcConnectptr, ttcConnectrecFileSize, regTcConnectionrec); - if (tcConnectptr.p->seqNoReplica == 0) // Primary replica - tcConnectptr.p->noFiredTriggers = tupKeyConf->noFiredTriggers; switch (tcConnectptr.p->transactionState) { case TcConnectionrec::WAIT_TUP: jam(); + if (tcConnectptr.p->seqNoReplica == 0) // Primary replica + tcConnectptr.p->noFiredTriggers = tupKeyConf->noFiredTriggers; tupkeyConfLab(signal); break; case TcConnectionrec::COPY_TUPKEY: @@ -3552,7 +3587,7 @@ void Dblqh::execLQHKEYREQ(Signal* signal) LQHKEY_error(signal, 6); return; }//if - regTcPtr->localFragptr = (regTcPtr->hashValue >> fragptr.p->hashCheckBit) & 1; + regTcPtr->localFragptr = regTcPtr->hashValue & 1; Uint8 TcopyType = fragptr.p->fragCopy; tfragDistKey = fragptr.p->fragDistributionKey; if (fragptr.p->fragStatus == Fragrecord::ACTIVE_CREATION) { @@ -5137,7 +5172,7 @@ void Dblqh::errorReport(Signal* signal, int place) jam(); break; }//switch - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//Dblqh::errorReport() @@ -5198,7 +5233,7 @@ void Dblqh::execCOMMITREQ(Signal* signal) Uint32 transid2 = signal->theData[4]; Uint32 tcOprec = signal->theData[6]; if (ERROR_INSERTED(5004)) { - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); } if (ERROR_INSERTED(5017)) { CLEAR_ERROR_INSERT_VALUE; @@ -5320,7 +5355,7 @@ void Dblqh::execCOMPLETEREQ(Signal* signal) Uint32 transid2 = signal->theData[3]; Uint32 tcOprec = signal->theData[5]; if (ERROR_INSERTED(5005)) { - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); } if (ERROR_INSERTED(5018)) { CLEAR_ERROR_INSERT_VALUE; @@ -5958,7 +5993,7 @@ void Dblqh::execABORTREQ(Signal* signal) Uint32 transid2 = signal->theData[3]; Uint32 tcOprec = signal->theData[5]; if (ERROR_INSERTED(5006)) { - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); } if (ERROR_INSERTED(5016)) { CLEAR_ERROR_INSERT_VALUE; @@ -6769,7 +6804,7 @@ void Dblqh::lqhTransNextLab(Signal* signal) /* ------------------------------------------------------------ * THIS IS AN ERROR THAT SHOULD NOT OCCUR. WE CRASH THE SYSTEM. * ------------------------------------------------------------ */ - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//if }//if @@ -6872,49 +6907,6 @@ Dblqh::scanMarkers(Signal* signal, * ALL TUPLES IN THE FRAGMENT. TUP PERFORMS THE NECESSARY SEARCH CONDITIONS * TO ENSURE THAT ONLY VALID TUPLES ARE RETURNED TO THE APPLICATION. * ------------------------------------------------------------------------- */ - -void Dblqh::execACC_SCAN_INFO(Signal* signal) -{ - jamEntry(); - scanptr.i = signal->theData[0]; - c_scanRecordPool.getPtr(scanptr); - Uint32 length = signal->theData[3]; - ndbrequire(length <= 4); - accScanInfoEnterLab(signal, &signal->theData[4], length); -}//Dblqh::execACC_SCAN_INFO() - - -void Dblqh::execACC_SCAN_INFO24(Signal* signal) -{ - jamEntry(); - scanptr.i = signal->theData[0]; - c_scanRecordPool.getPtr(scanptr); - Uint32 length = signal->theData[3]; - ndbrequire(length <= 20); - accScanInfoEnterLab(signal, &signal->theData[4], length); -}//Dblqh::execACC_SCAN_INFO24() - -void Dblqh::accScanInfoEnterLab(Signal* signal, - Uint32* dataPtr, - Uint32 length) -{ - ndbrequire(length != 0); - if (scanptr.p->scanState == ScanRecord::WAIT_SCAN_KEYINFO) { - jam(); - if (keyinfoLab(signal, dataPtr, length)) { - jam(); - nextScanConfLoopLab(signal); - }//if - } else { - ndbrequire(scanptr.p->scanState == ScanRecord::WAIT_COPY_KEYINFO); - jam(); - if (keyinfoLab(signal, dataPtr, length)) { - jam(); - copySendTupkeyReqLab(signal); - }//if - }//if -}//Dblqh::accScanInfoEnterLab() - /* *************** */ /* ACC_SCANCONF > */ /* *************** */ @@ -6989,7 +6981,7 @@ void Dblqh::execNEXT_SCANCONF(Signal* signal) void Dblqh::execNEXT_SCANREF(Signal* signal) { jamEntry(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//Dblqh::execNEXT_SCANREF() @@ -7044,6 +7036,7 @@ void Dblqh::execSTORED_PROCREF(Signal* signal) switch (scanptr.p->scanState) { case ScanRecord::WAIT_STORED_PROC_SCAN: jam(); + scanptr.p->scanCompletedStatus = ZTRUE; scanptr.p->scanStoredProcId = signal->theData[2]; tcConnectptr.p->errorCode = errorCode; closeScanLab(signal); @@ -7249,10 +7242,7 @@ void Dblqh::continueScanReleaseAfterBlockedLab(Signal* signal) scanptr.p->scanReleaseCounter -1, false); signal->theData[2] = NextScanReq::ZSCAN_COMMIT; - if (! scanptr.p->rangeScan) - sendSignal(tcConnectptr.p->tcAccBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); - else - sendSignal(tcConnectptr.p->tcTuxBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); + sendSignal(scanptr.p->scanBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); }//Dblqh::continueScanReleaseAfterBlockedLab() /* ------------------------------------------------------------------------- @@ -7280,7 +7270,6 @@ void Dblqh::closeScanRequestLab(Signal* signal) jam(); tupScanCloseConfLab(signal); break; - case ScanRecord::WAIT_SCAN_KEYINFO: case ScanRecord::WAIT_NEXT_SCAN: jam(); /* ------------------------------------------------------------------- @@ -7344,6 +7333,7 @@ void Dblqh::closeScanRequestLab(Signal* signal) * WE ARE STILL WAITING FOR THE ATTRIBUTE INFORMATION THAT * OBVIOUSLY WILL NOT ARRIVE. WE CAN QUIT IMMEDIATELY HERE. * --------------------------------------------------------------------- */ + //XXX jonas this have to be wrong... releaseOprec(signal); if (tcConnectptr.p->abortState == TcConnectionrec::NEW_FROM_TC) { jam(); @@ -7410,15 +7400,15 @@ void Dblqh::scanLockReleasedLab(Signal* signal) scanptr.p->m_curr_batch_size_rows = 0; scanptr.p->m_curr_batch_size_bytes = 0; closeScanLab(signal); + } else if (scanptr.p->m_last_row && !scanptr.p->scanLockHold) { + jam(); + closeScanLab(signal); + return; } else if (scanptr.p->check_scan_batch_completed() && scanptr.p->scanLockHold != ZTRUE) { jam(); scanptr.p->scanState = ScanRecord::WAIT_SCAN_NEXTREQ; sendScanFragConf(signal, ZFALSE); - } else if (scanptr.p->m_last_row && !scanptr.p->scanLockHold) { - jam(); - closeScanLab(signal); - return; } else { jam(); /* @@ -7565,6 +7555,7 @@ void Dblqh::execSCAN_FRAGREQ(Signal* signal) const Uint32 scanLockMode = ScanFragReq::getLockMode(reqinfo); const Uint8 keyinfo = ScanFragReq::getKeyinfoFlag(reqinfo); const Uint8 rangeScan = ScanFragReq::getRangeScanFlag(reqinfo); + const Uint8 tupScan = ScanFragReq::getTupScanFlag(reqinfo); ptrCheckGuard(tabptr, ctabrecFileSize, tablerec); if(tabptr.p->tableStatus != Tablerec::TABLE_DEFINED){ @@ -7709,18 +7700,13 @@ void Dblqh::continueAfterReceivingAllAiLab(Signal* signal) req->fragmentNo = tcConnectptr.p->fragmentid; req->requestInfo = 0; AccScanReq::setLockMode(req->requestInfo, scanptr.p->scanLockMode); - AccScanReq::setKeyinfoFlag(req->requestInfo, scanptr.p->scanKeyinfoFlag); AccScanReq::setReadCommittedFlag(req->requestInfo, scanptr.p->readCommitted); + AccScanReq::setDescendingFlag(req->requestInfo, scanptr.p->descending); req->transId1 = tcConnectptr.p->transid[0]; req->transId2 = tcConnectptr.p->transid[1]; req->savePointId = tcConnectptr.p->savePointId; - // always use if-stmt to switch (instead of setting a "scan block ref") - if (! scanptr.p->rangeScan) - sendSignal(tcConnectptr.p->tcAccBlockref, GSN_ACC_SCANREQ, signal, - AccScanReq::SignalLength, JBB); - else - sendSignal(tcConnectptr.p->tcTuxBlockref, GSN_ACC_SCANREQ, signal, - AccScanReq::SignalLength, JBB); + sendSignal(scanptr.p->scanBlockref, GSN_ACC_SCANREQ, signal, + AccScanReq::SignalLength, JBB); }//Dblqh::continueAfterReceivingAllAiLab() void Dblqh::scanAttrinfoLab(Signal* signal, Uint32* dataPtr, Uint32 length) @@ -7814,11 +7800,6 @@ void Dblqh::execSCAN_HBREP(Signal* signal) } } -void Dblqh::sendScanFragRefLateLab(Signal* signal) -{ -}//Dblqh::sendScanFragRefLateLab() - - void Dblqh::accScanConfScanLab(Signal* signal) { AccScanConf * const accScanConf = (AccScanConf *)&signal->theData[0]; @@ -7837,17 +7818,15 @@ void Dblqh::accScanConfScanLab(Signal* signal) return; }//if scanptr.p->scanAccPtr = accScanConf->accPtr; - Uint32 boundAiLength = tcConnectptr.p->primKeyLen - 4; if (scanptr.p->rangeScan) { jam(); - TuxBoundInfo* const req = (TuxBoundInfo*)signal->getDataPtrSend(); + TuxBoundInfo* req = (TuxBoundInfo*)signal->getDataPtrSend(); req->errorCode = RNIL; req->tuxScanPtrI = scanptr.p->scanAccPtr; - req->boundAiLength = boundAiLength; - if(boundAiLength > 0) - sendKeyinfoAcc(signal, TuxBoundInfo::SignalLength); - EXECUTE_DIRECT(DBTUX, GSN_TUX_BOUND_INFO, - signal, TuxBoundInfo::SignalLength + boundAiLength); + Uint32 len = req->boundAiLength = copy_bounds(req->data, tcConnectptr.p); + EXECUTE_DIRECT(DBTUX, GSN_TUX_BOUND_INFO, signal, + TuxBoundInfo::SignalLength + len); + jamEntry(); if (req->errorCode != 0) { jam(); @@ -7859,35 +7838,176 @@ void Dblqh::accScanConfScanLab(Signal* signal) tcConnectptr.p->errorCode = req->errorCode; } } - scanptr.p->scanState = ScanRecord::WAIT_STORED_PROC_SCAN; - signal->theData[0] = tcConnectptr.p->tupConnectrec; - signal->theData[1] = tcConnectptr.p->tableref; - signal->theData[2] = scanptr.p->scanSchemaVersion; - signal->theData[3] = ZSTORED_PROC_SCAN; - signal->theData[4] = scanptr.p->scanAiLength; - sendSignal(tcConnectptr.p->tcTupBlockref, - GSN_STORED_PROCREQ, signal, 5, JBB); - - signal->theData[0] = tcConnectptr.p->tupConnectrec; - AttrbufPtr regAttrinbufptr; - regAttrinbufptr.i = tcConnectptr.p->firstAttrinbuf; - while (regAttrinbufptr.i != RNIL) { - ptrCheckGuard(regAttrinbufptr, cattrinbufFileSize, attrbuf); + scanptr.p->scanState = ScanRecord::WAIT_STORED_PROC_SCAN; + if(scanptr.p->scanStoredProcId == RNIL) + { jam(); - Uint32 dataLen = regAttrinbufptr.p->attrbuf[ZINBUF_DATA_LEN]; - ndbrequire(dataLen != 0); - // first 3 words already set in STORED_PROCREQ - MEMCOPY_NO_WORDS(&signal->theData[3], - ®Attrinbufptr.p->attrbuf[0], - dataLen); + signal->theData[0] = tcConnectptr.p->tupConnectrec; + signal->theData[1] = tcConnectptr.p->tableref; + signal->theData[2] = scanptr.p->scanSchemaVersion; + signal->theData[3] = ZSTORED_PROC_SCAN; + + signal->theData[4] = scanptr.p->scanAiLength; sendSignal(tcConnectptr.p->tcTupBlockref, - GSN_ATTRINFO, signal, dataLen + 3, JBB); - regAttrinbufptr.i = regAttrinbufptr.p->attrbuf[ZINBUF_NEXT]; - }//while - releaseOprec(signal); + GSN_STORED_PROCREQ, signal, 5, JBB); + + signal->theData[0] = tcConnectptr.p->tupConnectrec; + AttrbufPtr regAttrinbufptr; + Uint32 firstAttr = regAttrinbufptr.i = tcConnectptr.p->firstAttrinbuf; + while (regAttrinbufptr.i != RNIL) { + ptrCheckGuard(regAttrinbufptr, cattrinbufFileSize, attrbuf); + jam(); + Uint32 dataLen = regAttrinbufptr.p->attrbuf[ZINBUF_DATA_LEN]; + ndbrequire(dataLen != 0); + // first 3 words already set in STORED_PROCREQ + MEMCOPY_NO_WORDS(&signal->theData[3], + ®Attrinbufptr.p->attrbuf[0], + dataLen); + sendSignal(tcConnectptr.p->tcTupBlockref, + GSN_ATTRINFO, signal, dataLen + 3, JBB); + regAttrinbufptr.i = regAttrinbufptr.p->attrbuf[ZINBUF_NEXT]; + c_no_attrinbuf_recs++; + }//while + + /** + * Release attr info + */ + if(firstAttr != RNIL) + { + regAttrinbufptr.p->attrbuf[ZINBUF_NEXT] = cfirstfreeAttrinbuf; + cfirstfreeAttrinbuf = firstAttr; + tcConnectptr.p->firstAttrinbuf = tcConnectptr.p->lastAttrinbuf = RNIL; + } + } + else + { + jam(); + storedProcConfScanLab(signal); + } }//Dblqh::accScanConfScanLab() +#define print_buf(s,idx,len) {\ + printf(s); Uint32 t2=len; DatabufPtr t3; t3.i = idx; \ + while(t3.i != RNIL && t2-- > 0){\ + ptrCheckGuard(t3, cdatabufFileSize, databuf);\ + printf("%d ", t3.i); t3.i= t3.p->nextDatabuf;\ + } printf("\n"); } + +Uint32 +Dblqh::copy_bounds(Uint32 * dst, TcConnectionrec* tcPtrP) +{ + /** + * copy_bounds handles multiple bounds by + * in the 16 upper bits of the first words (used to specify bound type) + * setting the length of this specific bound + * + */ + + DatabufPtr regDatabufptr; + Uint32 left = 4 - tcPtrP->m_offset_current_keybuf; // left in buf + Uint32 totalLen = tcPtrP->primKeyLen - 4; + regDatabufptr.i = tcPtrP->firstTupkeybuf; + + ndbassert(tcPtrP->primKeyLen >= 4); + ndbassert(tcPtrP->m_offset_current_keybuf < 4); + ndbassert(!(totalLen == 0 && regDatabufptr.i != RNIL)); + ndbassert(!(totalLen != 0 && regDatabufptr.i == RNIL)); + + if(totalLen) + { + ptrCheckGuard(regDatabufptr, cdatabufFileSize, databuf); + Uint32 sig0 = regDatabufptr.p->data[0]; + Uint32 sig1 = regDatabufptr.p->data[1]; + Uint32 sig2 = regDatabufptr.p->data[2]; + Uint32 sig3 = regDatabufptr.p->data[3]; + + switch(left){ + case 4: + * dst++ = sig0; + case 3: + * dst++ = sig1; + case 2: + * dst++ = sig2; + case 1: + * dst++ = sig3; + } + + Uint32 first = (* (dst - left)); // First word in range + + // Length of this range + Uint8 offset; + const Uint32 len = (first >> 16) ? (first >> 16) : totalLen; + tcPtrP->m_scan_curr_range_no = (first & 0xFFF0) >> 4; + (* (dst - left)) = (first & 0xF); // Remove length & range no + + if(len < left) + { + offset = len; + } + else + { + Databuf * lastP; + left = (len - left); + regDatabufptr.i = regDatabufptr.p->nextDatabuf; + + while(left >= 4) + { + left -= 4; + lastP = regDatabufptr.p; + ptrCheckGuard(regDatabufptr, cdatabufFileSize, databuf); + sig0 = regDatabufptr.p->data[0]; + sig1 = regDatabufptr.p->data[1]; + sig2 = regDatabufptr.p->data[2]; + sig3 = regDatabufptr.p->data[3]; + regDatabufptr.i = regDatabufptr.p->nextDatabuf; + + * dst++ = sig0; + * dst++ = sig1; + * dst++ = sig2; + * dst++ = sig3; + } + + if(left > 0) + { + lastP = regDatabufptr.p; + ptrCheckGuard(regDatabufptr, cdatabufFileSize, databuf); + sig0 = regDatabufptr.p->data[0]; + sig1 = regDatabufptr.p->data[1]; + sig2 = regDatabufptr.p->data[2]; + sig3 = regDatabufptr.p->data[3]; + * dst++ = sig0; + * dst++ = sig1; + * dst++ = sig2; + * dst++ = sig3; + } + else + { + lastP = regDatabufptr.p; + } + offset = left & 3; + lastP->nextDatabuf = cfirstfreeDatabuf; + cfirstfreeDatabuf = tcPtrP->firstTupkeybuf; + ndbassert(cfirstfreeDatabuf != RNIL); + } + + if(len == totalLen && regDatabufptr.i != RNIL) + { + regDatabufptr.p->nextDatabuf = cfirstfreeDatabuf; + cfirstfreeDatabuf = regDatabufptr.i; + tcPtrP->lastTupkeybuf = regDatabufptr.i = RNIL; + ndbassert(cfirstfreeDatabuf != RNIL); + } + + tcPtrP->m_offset_current_keybuf = offset; + tcPtrP->firstTupkeybuf = regDatabufptr.i; + tcPtrP->primKeyLen = 4 + totalLen - len; + + return len; + } + return totalLen; +} + /* ------------------------------------------------------------------------- * ENTER STORED_PROCCONF WITH * TC_CONNECTPTR, @@ -7938,14 +8058,10 @@ void Dblqh::continueFirstScanAfterBlockedLab(Signal* signal) scanptr.i = tcConnectptr.p->tcScanRec; c_scanRecordPool.getPtr(scanptr); scanptr.p->scanState = ScanRecord::WAIT_NEXT_SCAN; - init_acc_ptr_list(scanptr.p); signal->theData[0] = scanptr.p->scanAccPtr; signal->theData[1] = RNIL; signal->theData[2] = NextScanReq::ZSCAN_NEXT; - if (! scanptr.p->rangeScan) - sendSignal(tcConnectptr.p->tcAccBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); - else - sendSignal(tcConnectptr.p->tcTuxBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); + sendSignal(scanptr.p->scanBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); return; }//Dblqh::continueFirstScanAfterBlockedLab() @@ -8015,10 +8131,8 @@ void Dblqh::continueAfterCheckLcpStopBlocked(Signal* signal) c_scanRecordPool.getPtr(scanptr); signal->theData[0] = scanptr.p->scanAccPtr; signal->theData[1] = AccCheckScan::ZNOT_CHECK_LCP_STOP; - if (! scanptr.p->rangeScan) - EXECUTE_DIRECT(DBACC, GSN_ACC_CHECK_SCAN, signal, 2); - else - EXECUTE_DIRECT(DBTUX, GSN_ACC_CHECK_SCAN, signal, 2); + EXECUTE_DIRECT(refToBlock(scanptr.p->scanBlockref), GSN_ACC_CHECK_SCAN, + signal, 2); }//Dblqh::continueAfterCheckLcpStopBlocked() /* ------------------------------------------------------------------------- @@ -8066,7 +8180,10 @@ void Dblqh::nextScanConfScanLab(Signal* signal) if (scanptr.p->m_curr_batch_size_rows > 0) { jam(); - scanptr.p->scanCompletedStatus = ZTRUE; + + if((tcConnectptr.p->primKeyLen - 4) == 0) + scanptr.p->scanCompletedStatus = ZTRUE; + scanptr.p->scanState = ScanRecord::WAIT_SCAN_NEXTREQ; sendScanFragConf(signal, ZFALSE); return; @@ -8105,12 +8222,8 @@ void Dblqh::nextScanConfScanLab(Signal* signal) signal->theData[0] = scanptr.p->scanAccPtr; signal->theData[1] = AccCheckScan::ZCHECK_LCP_STOP; - if (! scanptr.p->rangeScan) - sendSignal(tcConnectptr.p->tcAccBlockref, - GSN_ACC_CHECK_SCAN, signal, 2, JBB); - else - sendSignal(tcConnectptr.p->tcTuxBlockref, - GSN_ACC_CHECK_SCAN, signal, 2, JBB); + sendSignal(scanptr.p->scanBlockref, + GSN_ACC_CHECK_SCAN, signal, 2, JBB); return; }//if jam(); @@ -8121,22 +8234,6 @@ void Dblqh::nextScanConfScanLab(Signal* signal) scanptr.p->scanLocalref[0] = nextScanConf->localKey[0]; scanptr.p->scanLocalref[1] = nextScanConf->localKey[1]; scanptr.p->scanLocalFragid = nextScanConf->fragId; - if (scanptr.p->scanKeyinfoFlag) { - jam(); - tcConnectptr.p->primKeyLen = nextScanConf->keyLength; - seizeTupkeybuf(signal); - databufptr.p->data[0] = nextScanConf->key[0]; - databufptr.p->data[1] = nextScanConf->key[1]; - databufptr.p->data[2] = nextScanConf->key[2]; - databufptr.p->data[3] = nextScanConf->key[3]; - if (nextScanConf->keyLength > 4) { - jam(); - tcConnectptr.p->save1 = 4; - scanptr.p->scanState = ScanRecord::WAIT_SCAN_KEYINFO; - return; - }//if - }//if - jam(); nextScanConfLoopLab(signal); }//Dblqh::nextScanConfScanLab() @@ -8148,7 +8245,6 @@ void Dblqh::nextScanConfLoopLab(Signal* signal) if (scanptr.p->scanCompletedStatus == ZTRUE) { jam(); releaseActiveFrag(signal); - releaseOprec(signal); if ((scanptr.p->scanLockHold == ZTRUE) && (scanptr.p->m_curr_batch_size_rows > 0)) { jam(); @@ -8170,13 +8266,7 @@ void Dblqh::nextScanConfLoopLab(Signal* signal) ptrCheckGuard(fragptr, cfragrecFileSize, fragrecord); if (! scanptr.p->rangeScan) { tableRef = tcConnectptr.p->tableref; - if (fragptr.p->fragId == scanptr.p->scanLocalFragid) { - jam(); - tupFragPtr = fragptr.p->tupFragptr[0]; - } else { - jam(); - tupFragPtr = fragptr.p->tupFragptr[1]; - }//if + tupFragPtr = fragptr.p->tupFragptr[scanptr.p->scanLocalFragid & 1]; } else { jam(); // for ordered index use primary table @@ -8184,13 +8274,7 @@ void Dblqh::nextScanConfLoopLab(Signal* signal) tFragPtr.i = fragptr.p->tableFragptr; ptrCheckGuard(tFragPtr, cfragrecFileSize, fragrecord); tableRef = tFragPtr.p->tabRef; - if (tFragPtr.p->fragId == scanptr.p->scanLocalFragid) { - jam(); - tupFragPtr = tFragPtr.p->tupFragptr[0]; - } else { - jam(); - tupFragPtr = tFragPtr.p->tupFragptr[1]; - }//if + tupFragPtr = tFragPtr.p->tupFragptr[scanptr.p->scanLocalFragid & 1]; } { jam(); @@ -8225,33 +8309,46 @@ void Dblqh::nextScanConfLoopLab(Signal* signal) * ------------------------------------------------------------------------- * PRECONDITION: SCAN_STATE = WAIT_SCAN_KEYINFO * ------------------------------------------------------------------------- */ -bool Dblqh::keyinfoLab(Signal* signal, Uint32* dataPtr, Uint32 length) +void +Dblqh::keyinfoLab(const Uint32 * src, const Uint32 * end) { - tcConnectptr.i = scanptr.p->scanTcrec; - ptrCheckGuard(tcConnectptr, ctcConnectrecFileSize, tcConnectionrec); - Uint32 index = 0; do { jam(); - seizeTupkeybuf(signal); - databufptr.p->data[0] = dataPtr[index]; - databufptr.p->data[1] = dataPtr[index + 1]; - databufptr.p->data[2] = dataPtr[index + 2]; - databufptr.p->data[3] = dataPtr[index + 3]; - index += 4; - tcConnectptr.p->save1 = tcConnectptr.p->save1 + 4; - if (tcConnectptr.p->save1 >= tcConnectptr.p->primKeyLen) { - jam(); - return true; - }//if - if (index >= length) { - jam(); - return false; - }//if - } while (index < 20); - ndbrequire(false); - return false; + seizeTupkeybuf(0); + databufptr.p->data[0] = * src ++; + databufptr.p->data[1] = * src ++; + databufptr.p->data[2] = * src ++; + databufptr.p->data[3] = * src ++; + } while (src < end); }//Dblqh::keyinfoLab() +Uint32 +Dblqh::readPrimaryKeys(ScanRecord *scanP, TcConnectionrec *tcConP, Uint32 *dst) +{ + Uint32 tableId = tcConP->tableref; + Uint32 fragId = scanP->scanLocalFragid; + Uint32 fragPageId = scanP->scanLocalref[0]; + Uint32 pageIndex = scanP->scanLocalref[1]; + + if(scanP->rangeScan) + { + jam(); + // for ordered index use primary table + FragrecordPtr tFragPtr; + tFragPtr.i = fragptr.p->tableFragptr; + ptrCheckGuard(tFragPtr, cfragrecFileSize, fragrecord); + tableId = tFragPtr.p->tabRef; + } + + int ret = c_tup->accReadPk(tableId, fragId, fragPageId, pageIndex, dst, false); + if(0) + ndbout_c("readPrimaryKeys(table: %d fragment: %d [ %d %d ] -> %d", + tableId, fragId, fragPageId, pageIndex, ret); + ndbassert(ret > 0); + + return ret; +} + /* ------------------------------------------------------------------------- * ENTER TUPKEYCONF * ------------------------------------------------------------------------- @@ -8271,7 +8368,6 @@ void Dblqh::scanTupkeyConfLab(Signal* signal) /* --------------------------------------------------------------------- * STOP THE SCAN PROCESS IF THIS HAS BEEN REQUESTED. * --------------------------------------------------------------------- */ - releaseOprec(signal); if ((scanptr.p->scanLockHold == ZTRUE) && (scanptr.p->m_curr_batch_size_rows > 0)) { jam(); @@ -8285,10 +8381,8 @@ void Dblqh::scanTupkeyConfLab(Signal* signal) }//if if (scanptr.p->scanKeyinfoFlag) { jam(); - sendKeyinfo20(signal, scanptr.p, tcConnectptr.p); - releaseOprec(signal); - - tdata4 += tcConnectptr.p->primKeyLen;// Inform API about keyinfo len aswell + // Inform API about keyinfo len aswell + tdata4 += sendKeyinfo20(signal, scanptr.p, tcConnectptr.p); }//if ndbrequire(scanptr.p->m_curr_batch_size_rows < MAX_PARALLEL_OP_PER_SCAN); scanptr.p->m_curr_batch_size_bytes+= tdata4; @@ -8372,10 +8466,7 @@ void Dblqh::continueScanAfterBlockedLab(Signal* signal) signal->theData[0] = scanptr.p->scanAccPtr; signal->theData[1] = accOpPtr; signal->theData[2] = scanptr.p->scanFlag; - if (! scanptr.p->rangeScan) - sendSignal(tcConnectptr.p->tcAccBlockref, GSN_NEXT_SCANREQ, signal, 3,JBB); - else - sendSignal(tcConnectptr.p->tcTuxBlockref, GSN_NEXT_SCANREQ, signal, 3,JBB); + sendSignal(scanptr.p->scanBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); }//Dblqh::continueScanAfterBlockedLab() /* ------------------------------------------------------------------------- @@ -8390,7 +8481,6 @@ void Dblqh::scanTupkeyRefLab(Signal* signal) tcConnectptr.p->transactionState = TcConnectionrec::SCAN_STATE_USED; scanptr.i = tcConnectptr.p->tcScanRec; releaseActiveFrag(signal); - releaseOprec(signal); c_scanRecordPool.getPtr(scanptr); if (scanptr.p->scanCompletedStatus == ZTRUE) { /* --------------------------------------------------------------------- @@ -8498,10 +8588,7 @@ void Dblqh::continueCloseScanAfterBlockedLab(Signal* signal) signal->theData[0] = scanptr.p->scanAccPtr; signal->theData[1] = RNIL; signal->theData[2] = NextScanReq::ZSCAN_CLOSE; - if (! scanptr.p->rangeScan) - sendSignal(tcConnectptr.p->tcAccBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); - else - sendSignal(tcConnectptr.p->tcTuxBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); + sendSignal(scanptr.p->scanBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); }//Dblqh::continueCloseScanAfterBlockedLab() /* ------------------------------------------------------------------------- @@ -8512,8 +8599,18 @@ void Dblqh::continueCloseScanAfterBlockedLab(Signal* signal) void Dblqh::accScanCloseConfLab(Signal* signal) { tcConnectptr.i = scanptr.p->scanTcrec; - scanptr.p->scanState = ScanRecord::WAIT_DELETE_STORED_PROC_ID_SCAN; ptrCheckGuard(tcConnectptr, ctcConnectrecFileSize, tcConnectionrec); + + if((tcConnectptr.p->primKeyLen - 4) > 0 && + scanptr.p->scanCompletedStatus != ZTRUE) + { + jam(); + releaseActiveFrag(signal); + continueAfterReceivingAllAiLab(signal); + return; + } + + scanptr.p->scanState = ScanRecord::WAIT_DELETE_STORED_PROC_ID_SCAN; signal->theData[0] = tcConnectptr.p->tupConnectrec; signal->theData[1] = tcConnectptr.p->tableref; signal->theData[2] = scanptr.p->scanSchemaVersion; @@ -8575,7 +8672,9 @@ Uint32 Dblqh::initScanrec(const ScanFragReq* scanFragReq) const Uint32 scanLockHold = ScanFragReq::getHoldLockFlag(reqinfo); const Uint32 keyinfo = ScanFragReq::getKeyinfoFlag(reqinfo); const Uint32 readCommitted = ScanFragReq::getReadCommittedFlag(reqinfo); - const Uint32 idx = ScanFragReq::getRangeScanFlag(reqinfo); + const Uint32 rangeScan = ScanFragReq::getRangeScanFlag(reqinfo); + const Uint32 descending = ScanFragReq::getDescendingFlag(reqinfo); + const Uint32 tupScan = ScanFragReq::getTupScanFlag(reqinfo); const Uint32 attrLen = ScanFragReq::getAttrLen(reqinfo); const Uint32 scanPrio = ScanFragReq::getScanPrio(reqinfo); @@ -8593,10 +8692,19 @@ Uint32 Dblqh::initScanrec(const ScanFragReq* scanFragReq) scanptr.p->m_max_batch_size_rows = max_rows; scanptr.p->m_max_batch_size_bytes = max_bytes; + if (! rangeScan && ! tupScan) + scanptr.p->scanBlockref = tcConnectptr.p->tcAccBlockref; + else if (! tupScan) + scanptr.p->scanBlockref = tcConnectptr.p->tcTuxBlockref; + else + scanptr.p->scanBlockref = tcConnectptr.p->tcTupBlockref; + scanptr.p->scanErrorCounter = 0; scanptr.p->scanLockMode = scanLockMode; scanptr.p->readCommitted = readCommitted; - scanptr.p->rangeScan = idx; + scanptr.p->rangeScan = rangeScan; + scanptr.p->descending = descending; + scanptr.p->tupScan = tupScan; scanptr.p->scanState = ScanRecord::SCAN_FREE; scanptr.p->scanFlag = ZFALSE; scanptr.p->scanLocalref[0] = 0; @@ -8606,6 +8714,7 @@ Uint32 Dblqh::initScanrec(const ScanFragReq* scanFragReq) scanptr.p->scanNumber = ~0; scanptr.p->scanApiOpPtr = scanFragReq->clientOpPtr; scanptr.p->m_last_row = 0; + scanptr.p->scanStoredProcId = RNIL; if (max_rows == 0 || (max_bytes > 0 && max_rows > max_bytes)){ jam(); @@ -8627,8 +8736,8 @@ Uint32 Dblqh::initScanrec(const ScanFragReq* scanFragReq) * !idx uses 1 - (MAX_PARALLEL_SCANS_PER_FRAG - 1) = 1-11 * idx uses from MAX_PARALLEL_SCANS_PER_FRAG - MAX = 12-42) */ - Uint32 start = (idx ? MAX_PARALLEL_SCANS_PER_FRAG : 1 ); - Uint32 stop = (idx ? MAX_PARALLEL_INDEX_SCANS_PER_FRAG : MAX_PARALLEL_SCANS_PER_FRAG - 1); + Uint32 start = (rangeScan || tupScan ? MAX_PARALLEL_SCANS_PER_FRAG : 1 ); + Uint32 stop = (rangeScan || tupScan ? MAX_PARALLEL_INDEX_SCANS_PER_FRAG : MAX_PARALLEL_SCANS_PER_FRAG - 1); stop += start; Uint32 free = tFragPtr.p->m_scanNumberMask.find(start); @@ -8664,7 +8773,8 @@ Uint32 Dblqh::initScanrec(const ScanFragReq* scanFragReq) #ifdef TRACE_SCAN_TAKEOVER ndbout_c("adding (%d %d) table: %d fragId: %d frag.i: %d tableFragptr: %d", scanptr.p->scanNumber, scanptr.p->fragPtrI, - tabptr.i, scanFragReq->fragmentNo, fragptr.i, fragptr.p->tableFragptr); + tabptr.i, scanFragReq->fragmentNoKeyLen & 0xFFFF, + fragptr.i, fragptr.p->tableFragptr); #endif c_scanTakeOverHash.add(scanptr); } @@ -8700,6 +8810,8 @@ void Dblqh::initScanTc(Signal* signal, tcConnectptr.p->operation = ZREAD; tcConnectptr.p->listState = TcConnectionrec::NOT_IN_LIST; tcConnectptr.p->commitAckMarker = RNIL; + tcConnectptr.p->m_offset_current_keybuf = 0; + tcConnectptr.p->m_scan_curr_range_no = 0; tabptr.p->usageCount++; }//Dblqh::initScanTc() @@ -8814,23 +8926,17 @@ void Dblqh::releaseScanrec(Signal* signal) * ------- SEND KEYINFO20 TO API ------- * * ------------------------------------------------------------------------ */ -void Dblqh::sendKeyinfo20(Signal* signal, - ScanRecord * scanP, - TcConnectionrec * tcConP) +Uint32 Dblqh::sendKeyinfo20(Signal* signal, + ScanRecord * scanP, + TcConnectionrec * tcConP) { ndbrequire(scanP->m_curr_batch_size_rows < MAX_PARALLEL_OP_PER_SCAN); KeyInfo20 * keyInfo = (KeyInfo20 *)&signal->theData[0]; - DatabufPtr TdataBuf; - TdataBuf.i = tcConP->firstTupkeybuf; - Uint32 keyLen = tcConP->primKeyLen; - const Uint32 dataBufSz = cdatabufFileSize; - /** * Note that this code requires signal->theData to be big enough for * a entire key */ - ndbrequire(keyLen * 4 <= sizeof(signal->theData)); const BlockReference ref = scanP->scanApiBlockref; const Uint32 scanOp = scanP->m_curr_batch_size_rows; const Uint32 nodeId = refToNode(ref); @@ -8843,24 +8949,12 @@ void Dblqh::sendKeyinfo20(Signal* signal, Uint32 * dst = keyInfo->keyData; dst += nodeId == getOwnNodeId() ? 0 : KeyInfo20::DataLength; - /** - * Copy keydata from data buffer into signal - * - */ - for(Uint32 i = 0; i < keyLen; i += 4){ - ptrCheckGuard(TdataBuf, dataBufSz, databuf); - * dst++ = TdataBuf.p->data[0]; - * dst++ = TdataBuf.p->data[1]; - * dst++ = TdataBuf.p->data[2]; - * dst++ = TdataBuf.p->data[3]; - TdataBuf.i = TdataBuf.p->nextDatabuf; - } - + Uint32 keyLen = readPrimaryKeys(scanP, tcConP, dst); + Uint32 fragId = tcConP->fragmentid; keyInfo->clientOpPtr = scanP->scanApiOpPtr; keyInfo->keyLen = keyLen; - keyInfo->scanInfo_Node = KeyInfo20::setScanInfo(scanOp, - scanP->scanNumber)+ - (getOwnNodeId() << 20); + keyInfo->scanInfo_Node = + KeyInfo20::setScanInfo(scanOp, scanP->scanNumber) + (fragId << 20); keyInfo->transId1 = tcConP->transid[0]; keyInfo->transId2 = tcConP->transid[1]; @@ -8883,7 +8977,7 @@ void Dblqh::sendKeyinfo20(Signal* signal, MEMCOPY_NO_WORDS(keyInfo->keyData, src, keyLen); sendSignal(ref, GSN_KEYINFO20, signal, KeyInfo20::HeaderLength+keyLen, JBB); - return; + return keyLen; } LinearSectionPtr ptr[3]; @@ -8891,13 +8985,13 @@ void Dblqh::sendKeyinfo20(Signal* signal, ptr[0].sz = keyLen; sendSignal(ref, GSN_KEYINFO20, signal, KeyInfo20::HeaderLength, JBB, ptr, 1); - return; + return keyLen; } EXECUTE_DIRECT(refToBlock(ref), GSN_KEYINFO20, signal, KeyInfo20::HeaderLength + keyLen); jamEntry(); - return; + return keyLen; } /** @@ -8923,7 +9017,7 @@ void Dblqh::sendKeyinfo20(Signal* signal, keyInfo->keyData[keyLen] = ref; sendSignal(routeBlockref, GSN_KEYINFO20_R, signal, KeyInfo20::HeaderLength+keyLen+1, JBB); - return; + return keyLen; } keyInfo->keyData[0] = ref; @@ -8932,7 +9026,7 @@ void Dblqh::sendKeyinfo20(Signal* signal, ptr[0].sz = keyLen; sendSignal(routeBlockref, GSN_KEYINFO20_R, signal, KeyInfo20::HeaderLength+1, JBB, ptr, 1); - return; + return keyLen; } /* ------------------------------------------------------------------------ @@ -8981,44 +9075,17 @@ void Dblqh::sendScanFragConf(Signal* signal, Uint32 scanCompleted) /* FRAGMENT TO A NEW REPLICA OF THE FRAGMENT. IT DOES ALSO SHUT DOWN ALL */ /* CONNECTIONS TO THE FAILED NODE. */ /*---------------------------------------------------------------------------*/ -void Dblqh::calculateHash(Signal* signal) -{ - DatabufPtr locDatabufptr; - UintR Ti; - UintR Tdata0; - UintR Tdata1; - UintR Tdata2; - UintR Tdata3; - UintR* Tdata32; - Uint64 Tdata[512]; - - Tdata32 = (UintR*)&Tdata[0]; - - Tdata0 = tcConnectptr.p->tupkeyData[0]; - Tdata1 = tcConnectptr.p->tupkeyData[1]; - Tdata2 = tcConnectptr.p->tupkeyData[2]; - Tdata3 = tcConnectptr.p->tupkeyData[3]; - Tdata32[0] = Tdata0; - Tdata32[1] = Tdata1; - Tdata32[2] = Tdata2; - Tdata32[3] = Tdata3; - locDatabufptr.i = tcConnectptr.p->firstTupkeybuf; - Ti = 4; - while (locDatabufptr.i != RNIL) { - ptrCheckGuard(locDatabufptr, cdatabufFileSize, databuf); - Tdata0 = locDatabufptr.p->data[0]; - Tdata1 = locDatabufptr.p->data[1]; - Tdata2 = locDatabufptr.p->data[2]; - Tdata3 = locDatabufptr.p->data[3]; - Tdata32[Ti ] = Tdata0; - Tdata32[Ti + 1] = Tdata1; - Tdata32[Ti + 2] = Tdata2; - Tdata32[Ti + 3] = Tdata3; - locDatabufptr.i = locDatabufptr.p->nextDatabuf; - Ti += 4; - }//while - tcConnectptr.p->hashValue = - md5_hash((Uint64*)&Tdata32[0], (UintR)tcConnectptr.p->primKeyLen); +Uint32 +Dblqh::calculateHash(Uint32 tableId, const Uint32* src) +{ + jam(); + Uint64 Tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1]; + Uint32 keyPartLen[MAX_ATTRIBUTES_IN_INDEX]; + Uint32 keyLen = xfrm_key(tableId, src, (Uint32*)Tmp, sizeof(Tmp) >> 2, + keyPartLen); + ndbrequire(keyLen); + + return md5_hash(Tmp, keyLen); }//Dblqh::calculateHash() /* *************************************** */ @@ -9070,6 +9137,7 @@ void Dblqh::execCOPY_FRAGREQ(Signal* signal) /* ------------------------------------------------------------------------- */ scanptr.p->m_max_batch_size_rows = 0; scanptr.p->rangeScan = 0; + scanptr.p->tupScan = 0; seizeTcrec(); /** @@ -9088,6 +9156,7 @@ void Dblqh::execCOPY_FRAGREQ(Signal* signal) scanptr.p->scanKeyinfoFlag = 0; // Don't put into hash scanptr.p->fragPtrI = fragptr.i; fragptr.p->m_scanNumberMask.clear(NR_ScanNo); + scanptr.p->scanBlockref = DBACC_REF; initScanTc(signal, 0, @@ -9108,7 +9177,6 @@ void Dblqh::execCOPY_FRAGREQ(Signal* signal) req->fragmentNo = fragId; req->requestInfo = 0; AccScanReq::setLockMode(req->requestInfo, 0); - AccScanReq::setKeyinfoFlag(req->requestInfo, 1); AccScanReq::setReadCommittedFlag(req->requestInfo, 0); req->transId1 = tcConnectptr.p->transid[0]; req->transId2 = tcConnectptr.p->transid[1]; @@ -9192,7 +9260,7 @@ void Dblqh::storedProcConfCopyLab(Signal* signal) jam(); default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; break; }//switch @@ -9270,12 +9338,6 @@ void Dblqh::nextScanConfCopyLab(Signal* signal) set_acc_ptr_in_scan_record(scanptr.p, 0, nextScanConf->accOperationPtr); initCopyTc(signal); - if (tcConnectptr.p->primKeyLen > 4) { - jam(); - tcConnectptr.p->save1 = 4; - scanptr.p->scanState = ScanRecord::WAIT_COPY_KEYINFO; - return; - }//if copySendTupkeyReqLab(signal); return; }//Dblqh::nextScanConfCopyLab() @@ -9291,13 +9353,7 @@ void Dblqh::copySendTupkeyReqLab(Signal* signal) scanptr.p->scanState = ScanRecord::WAIT_TUPKEY_COPY; fragptr.i = tcConnectptr.p->fragmentptr; ptrCheckGuard(fragptr, cfragrecFileSize, fragrecord); - if (fragptr.p->fragId == scanptr.p->scanLocalFragid) { - jam(); - tupFragPtr = fragptr.p->tupFragptr[0]; - } else { - jam(); - tupFragPtr = fragptr.p->tupFragptr[1]; - }//if + tupFragPtr = fragptr.p->tupFragptr[scanptr.p->scanLocalFragid & 1]; { TupKeyReq * const tupKeyReq = (TupKeyReq *)signal->getDataPtrSend(); @@ -9372,9 +9428,10 @@ void Dblqh::copyTupkeyConfLab(Signal* signal) const TupKeyConf * const tupKeyConf = (TupKeyConf *)signal->getDataPtr(); UintR readLength = tupKeyConf->readLength; - + Uint32 tableId = tcConnectptr.p->tableref; scanptr.i = tcConnectptr.p->tcScanRec; c_scanRecordPool.getPtr(scanptr); + ScanRecord* scanP = scanptr.p; releaseActiveFrag(signal); if (tcConnectptr.p->errorCode != 0) { jam(); @@ -9389,9 +9446,30 @@ void Dblqh::copyTupkeyConfLab(Signal* signal) closeCopyLab(signal); return; }//if + TcConnectionrec * tcConP = tcConnectptr.p; tcConnectptr.p->totSendlenAi = readLength; tcConnectptr.p->connectState = TcConnectionrec::COPY_CONNECTED; - calculateHash(signal); + + // Read primary keys (used to get here via scan keyinfo) + Uint32* tmp = signal->getDataPtrSend()+24; + Uint32 len= tcConnectptr.p->primKeyLen = readPrimaryKeys(scanP, tcConP, tmp); + + // Calculate hash (no need to linearies key) + if (g_key_descriptor_pool.getPtr(tableId)->hasCharAttr) + { + tcConnectptr.p->hashValue = calculateHash(tableId, tmp); + } + else + { + tcConnectptr.p->hashValue = md5_hash((Uint64*)tmp, len); + } + + // Move into databuffer to make packLqhkeyreqLab happy + memcpy(tcConP->tupkeyData, tmp, 4*4); + if(len > 4) + keyinfoLab(tmp+4, tmp + len); + LqhKeyReq::setKeyLen(tcConP->reqinfo, len); + /*---------------------------------------------------------------------------*/ // To avoid using up to many operation records in ACC we will increase the // constant to ensure that we never send more than 40 records at a time. @@ -9402,7 +9480,7 @@ void Dblqh::copyTupkeyConfLab(Signal* signal) // records to ensure that node recovery does not fail because of simultaneous // scanning. /*---------------------------------------------------------------------------*/ - UintR TnoOfWords = readLength + tcConnectptr.p->primKeyLen; + UintR TnoOfWords = readLength + len; TnoOfWords = TnoOfWords + MAGIC_CONSTANT; TnoOfWords = TnoOfWords + (TnoOfWords >> 2); @@ -9483,7 +9561,7 @@ void Dblqh::copyCompletedLab(Signal* signal) // Make sure that something is in progress. Otherwise we will simply stop // and nothing more will happen. /*---------------------------------------------------------------------------*/ - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//if return; @@ -9502,7 +9580,7 @@ void Dblqh::nextRecordCopy(Signal* signal) // scans on the same record and this will certainly lead to unexpected // behaviour. /*---------------------------------------------------------------------------*/ - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//if scanptr.p->scanState = ScanRecord::WAIT_NEXT_SCAN_COPY; @@ -9529,7 +9607,7 @@ void Dblqh::nextRecordCopy(Signal* signal) jam(); default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; break; }//switch @@ -9603,7 +9681,7 @@ void Dblqh::closeCopyLab(Signal* signal) jam(); default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; break; }//switch @@ -9724,7 +9802,6 @@ void Dblqh::closeCopyRequestLab(Signal* signal) scanptr.p->scanErrorCounter++; switch (scanptr.p->scanState) { case ScanRecord::WAIT_TUPKEY_COPY: - case ScanRecord::WAIT_COPY_KEYINFO: case ScanRecord::WAIT_NEXT_SCAN_COPY: jam(); /*---------------------------------------------------------------------------*/ @@ -9955,11 +10032,6 @@ void Dblqh::execCOPY_STATEREQ(Signal* signal) void Dblqh::initCopyTc(Signal* signal) { const NextScanConf * const nextScanConf = (NextScanConf *)&signal->theData[0]; - tcConnectptr.p->primKeyLen = nextScanConf->keyLength; - tcConnectptr.p->tupkeyData[0] = nextScanConf->key[0]; - tcConnectptr.p->tupkeyData[1] = nextScanConf->key[1]; - tcConnectptr.p->tupkeyData[2] = nextScanConf->key[2]; - tcConnectptr.p->tupkeyData[3] = nextScanConf->key[3]; scanptr.p->scanLocalref[0] = nextScanConf->localKey[0]; scanptr.p->scanLocalref[1] = nextScanConf->localKey[1]; scanptr.p->scanLocalFragid = nextScanConf->fragId; @@ -9968,7 +10040,6 @@ void Dblqh::initCopyTc(Signal* signal) tcConnectptr.p->opExec = 0; /* NOT INTERPRETED MODE */ tcConnectptr.p->schemaVersion = scanptr.p->scanSchemaVersion; Uint32 reqinfo = 0; - LqhKeyReq::setKeyLen(reqinfo, nextScanConf->keyLength); LqhKeyReq::setLockType(reqinfo, ZINSERT); LqhKeyReq::setDirtyFlag(reqinfo, 1); LqhKeyReq::setSimpleFlag(reqinfo, 1); @@ -10631,7 +10702,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; break; }//switch @@ -11455,7 +11526,7 @@ void Dblqh::execGCP_SAVEREQ(Signal* signal) const GCPSaveReq * const saveReq = (GCPSaveReq *)&signal->theData[0]; if (ERROR_INSERTED(5000)) { - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); } if (ERROR_INSERTED(5007)){ @@ -11821,7 +11892,7 @@ void Dblqh::execFSCLOSECONF(Signal* signal) return; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//switch }//Dblqh::execFSCLOSECONF() @@ -11896,7 +11967,7 @@ void Dblqh::execFSOPENCONF(Signal* signal) return; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//switch }//Dblqh::execFSOPENCONF() @@ -11955,7 +12026,7 @@ void Dblqh::execFSREADCONF(Signal* signal) return; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//switch }//Dblqh::execFSREADCONF() @@ -12057,7 +12128,7 @@ void Dblqh::execFSWRITECONF(Signal* signal) return; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//switch }//Dblqh::execFSWRITECONF() @@ -12101,7 +12172,7 @@ void Dblqh::execFSWRITEREF(Signal* signal) break; case LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); default: jam(); break; @@ -12315,7 +12386,7 @@ void Dblqh::writeLogfileLab(Signal* signal) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; break; }//switch @@ -13485,7 +13556,6 @@ void Dblqh::execSR_FRAGIDCONF(Signal* signal) ptrCheckGuard(fragptr, cfragrecFileSize, fragrecord); fragptr.p->accFragptr[0] = srFragidConf->fragPtr[0]; fragptr.p->accFragptr[1] = srFragidConf->fragPtr[1]; - fragptr.p->hashCheckBit = srFragidConf->hashCheckBit; Uint32 noLocFrag = srFragidConf->noLocFrag; ndbrequire(noLocFrag == 2); Uint32 fragid[2]; @@ -13548,7 +13618,7 @@ void Dblqh::execACC_SRCONF(Signal* signal) ptrCheckGuard(lcpLocptr, clcpLocrecFileSize, lcpLocRecord); if (lcpLocptr.p->lcpLocstate != LcpLocRecord::SR_ACC_STARTED) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//if @@ -13570,7 +13640,7 @@ void Dblqh::execACC_SRREF(Signal* signal) { jamEntry(); terrorCode = signal->theData[1]; - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//Dblqh::execACC_SRREF() @@ -13710,7 +13780,7 @@ void Dblqh::execTUP_SRREF(Signal* signal) { jamEntry(); terrorCode = signal->theData[1]; - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//Dblqh::execTUP_SRREF() @@ -14010,7 +14080,7 @@ void Dblqh::execEXEC_FRAGREF(Signal* signal) { jamEntry(); terrorCode = signal->theData[1]; - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//Dblqh::execEXEC_FRAGREF() @@ -14102,7 +14172,7 @@ void Dblqh::execSrCompletedLab(Signal* signal) * PROBLEM. THIS SHOULD NOT OCCUR. IF IT OCCURS ANYWAY THEN WE * HAVE TO FIND A CURE FOR THIS PROBLEM. * ----------------------------------------------------------------- */ - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//if signal->theData[0] = ZSR_LOG_LIMITS; @@ -14629,7 +14699,7 @@ void Dblqh::execSr(Signal* signal) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; break; }//switch @@ -14912,7 +14982,7 @@ void Dblqh::execDEBUG_SIG(Signal* signal) signal->theData[2], signal->theData[3], signal->theData[4], signal->theData[5], signal->theData[6], signal->theData[7]); - progError(__LINE__, ERR_SR_REDOLOG, buf); + progError(__LINE__, NDBD_EXIT_SR_REDOLOG, buf); return; }//Dblqh::execDEBUG_SIG() @@ -14984,12 +15054,12 @@ void Dblqh::invalidateLogAfterLastGCI(Signal* signal) { jam(); if (logPartPtr.p->logExecState != LogPartRecord::LES_EXEC_LOG_INVALIDATE) { jam(); - systemError(signal); + systemError(signal, __LINE__); } if (logFilePtr.p->fileNo != logPartPtr.p->invalidateFileNo) { jam(); - systemError(signal); + systemError(signal, __LINE__); } switch (lfoPtr.p->lfoState) { @@ -15044,7 +15114,7 @@ void Dblqh::invalidateLogAfterLastGCI(Signal* signal) { default: jam(); - systemError(signal); + systemError(signal, __LINE__); return; break; } @@ -15192,7 +15262,7 @@ void Dblqh::execLogComp(Signal* signal) if (logPartPtr.p->logPartState != LogPartRecord::SR_THIRD_PHASE_COMPLETED) { if (logPartPtr.p->logPartState != LogPartRecord::SR_THIRD_PHASE_STARTED) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; } else { jam(); @@ -15450,7 +15520,7 @@ void Dblqh::openSrFourthZeroSkipInitLab(Signal* signal) * THE HEADER PAGE IN THE LOG IS PAGE ZERO IN FILE ZERO. * THIS SHOULD NEVER OCCUR. * ------------------------------------------------------------------- */ - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//if }//if @@ -15508,7 +15578,7 @@ void Dblqh::srFourthComp(Signal* signal) if (logPartPtr.p->logPartState != LogPartRecord::SR_FOURTH_PHASE_COMPLETED) { if (logPartPtr.p->logPartState != LogPartRecord::SR_FOURTH_PHASE_STARTED) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; } else { jam(); @@ -15570,11 +15640,6 @@ void Dblqh::srFourthComp(Signal* signal) /* ####### ERROR MODULE ####### */ /* */ /* ######################################################################### */ -void Dblqh::warningHandlerLab(Signal* signal) -{ - systemErrorLab(signal); - return; -}//Dblqh::warningHandlerLab() /*---------------------------------------------------------------------------*/ /* AN ERROR OCCURRED THAT WE WILL NOT TREAT AS SYSTEM ERROR. MOST OFTEN THIS */ @@ -15595,9 +15660,10 @@ void Dblqh::warningHandlerLab(Signal* signal) /* THE COMMIT, COMPLETE OR ABORT PHASE, WE PERFORM A CRASH OF THE AXE VM*/ /*---------------------------------------------------------------------------*/ -void Dblqh::systemErrorLab(Signal* signal) +void Dblqh::systemErrorLab(Signal* signal, int line) { - progError(0, 0); + systemError(signal, line); + progError(line, NDBD_EXIT_NDBREQUIRE); /*************************************************************************>*/ /* WE WANT TO INVOKE AN IMMEDIATE ERROR HERE SO WE GET THAT BY */ /* INSERTING A CERTAIN POINTER OUT OF RANGE. */ @@ -15806,7 +15872,7 @@ void Dblqh::buildLinkedLogPageList(Signal* signal) // Uint32 checkSum = bllLogPagePtr.p->logPageWord[ZPOS_CHECKSUM]; // if (checkSum != calcCheckSum) { // ndbout << "Redolog: Checksum failure." << endl; -// progError(__LINE__, ERR_NDBREQUIRE, "Redolog: Checksum failure."); +// progError(__LINE__, NDBD_EXIT_NDBREQUIRE, "Redolog: Checksum failure."); // } // #endif @@ -15932,7 +15998,7 @@ CSC_ACC_DOWHILE: jam(); if (cscLcpLocptr.p->lcpLocstate != LcpLocRecord::SR_ACC_STARTED) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//if return; @@ -15949,7 +16015,7 @@ CSC_TUP_DOWHILE: jam(); if (cscLcpLocptr.p->lcpLocstate != LcpLocRecord::SR_TUP_STARTED) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//if return; @@ -16160,7 +16226,7 @@ error: "Unable to restart, failed while reading redo." " Likely invalid change of configuration"); progError(__LINE__, - ERR_INVALID_CONFIG, + NDBD_EXIT_INVALID_CONFIG, buf); }//Dblqh::findLogfile() @@ -17531,7 +17597,7 @@ void Dblqh::releaseAccList(Signal* signal) racTcNextConnectptr.i = tcConnectptr.p->nextTc; if (tcConnectptr.p->listState != TcConnectionrec::ACC_BLOCK_LIST) { jam(); - systemError(signal); + systemError(signal, __LINE__); }//if tcConnectptr.p->listState = TcConnectionrec::NOT_IN_LIST; if (racTcNextConnectptr.i != RNIL) { @@ -17710,7 +17776,7 @@ void Dblqh::releaseWaitQueue(Signal* signal) rwaTcNextConnectptr.i = tcConnectptr.p->nextTc; if (tcConnectptr.p->listState != TcConnectionrec::WAIT_QUEUE_LIST) { jam(); - systemError(signal); + systemError(signal, __LINE__); }//if tcConnectptr.p->listState = TcConnectionrec::NOT_IN_LIST; if (rwaTcNextConnectptr.i != RNIL) { @@ -17973,7 +18039,8 @@ void Dblqh::stepAhead(Signal* signal, Uint32 stepAheadWords) logFilePtr.p->currentLogpage = logPagePtr.p->logPageWord[ZNEXT_PAGE]; logPagePtr.i = logPagePtr.p->logPageWord[ZNEXT_PAGE]; logFilePtr.p->currentFilepage++; - ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord); + ptrCheckGuardErr(logPagePtr, clogPageFileSize, logPageRecord, + NDBD_EXIT_SR_REDOLOG); logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = ZPAGE_HEADER_SIZE; logPartPtr.p->execSrPagesRead--; logPartPtr.p->execSrPagesExecuted++; @@ -18274,7 +18341,7 @@ void Dblqh::writeNextLog(Signal* signal) /* CAN INVOKE THIS SYSTEM CRASH. HOWEVER ONLY */ /* VERY SERIOUS TIMING PROBLEMS. */ /* -------------------------------------------------- */ - systemError(signal); + systemError(signal, __LINE__); }//if }//if if (logFilePtr.p->currentMbyte == (ZNO_MBYTES_IN_FILE - 1)) { @@ -18485,6 +18552,69 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal) return; } + Uint32 arg= dumpState->args[0]; + if(arg == 2304 || arg == 2305) + { + jam(); + Uint32 i; + GcpRecordPtr gcp; gcp.i = RNIL; + for(i = 0; i<4; i++) + { + logPartPtr.i = i; + ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord); + ndbout_c("LP %d state: %d WW_Gci: %d gcprec: %d flq: %d currfile: %d tailFileNo: %d logTailMbyte: %d", + i, + logPartPtr.p->logPartState, + logPartPtr.p->waitWriteGciLog, + logPartPtr.p->gcprec, + logPartPtr.p->firstLogQueue, + logPartPtr.p->currentLogfile, + logPartPtr.p->logTailFileNo, + logPartPtr.p->logTailMbyte); + + if(gcp.i == RNIL && logPartPtr.p->gcprec != RNIL) + gcp.i = logPartPtr.p->gcprec; + + LogFileRecordPtr logFilePtr; + Uint32 first= logFilePtr.i= logPartPtr.p->firstLogfile; + do + { + ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord); + ndbout_c(" file %d(%d) FileChangeState: %d logFileStatus: %d currentMbyte: %d currentFilepage", + logFilePtr.p->fileNo, + logFilePtr.i, + logFilePtr.p->fileChangeState, + logFilePtr.p->logFileStatus, + logFilePtr.p->currentMbyte, + logFilePtr.p->currentFilepage); + logFilePtr.i = logFilePtr.p->nextLogFile; + } while(logFilePtr.i != first); + } + + if(gcp.i != RNIL) + { + ptrCheckGuard(gcp, cgcprecFileSize, gcpRecord); + for(i = 0; i<4; i++) + { + ndbout_c(" GCP %d file: %d state: %d sync: %d page: %d word: %d", + i, gcp.p->gcpFilePtr[i], gcp.p->gcpLogPartState[i], + gcp.p->gcpSyncReady[i], + gcp.p->gcpPageNo[i], + gcp.p->gcpWordNo[i]); + } + } + + if(arg== 2305) + { + progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, + "Please report this as a bug. " + "Provide as much info as possible, expecially all the " + "ndb_*_out.log files, Thanks. " + "Shutting down node due to failed handling of GCP_SAVEREQ"); + + } + } + if (dumpState->args[0] == DumpStateOrd::LqhErrorInsert5042 && signal->getLength() == 2) { c_error_insert_table_id = dumpState->args[1]; @@ -18493,7 +18623,6 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal) TcConnectionrec *regTcConnectionrec = tcConnectionrec; Uint32 ttcConnectrecFileSize = ctcConnectrecFileSize; - Uint32 arg = dumpState->args[0]; if(arg == 2306) { for(Uint32 i = 0; i<1024; i++) diff --git a/ndb/src/kernel/blocks/dbtc/Dbtc.hpp b/ndb/src/kernel/blocks/dbtc/Dbtc.hpp index 4488e7500f5..bf6ce7129ba 100644 --- a/ndb/src/kernel/blocks/dbtc/Dbtc.hpp +++ b/ndb/src/kernel/blocks/dbtc/Dbtc.hpp @@ -141,6 +141,8 @@ #define ZALREADYEXIST 630 #define ZINCONSISTENTHASHINDEX 892 #define ZNOTUNIQUE 893 + +#define ZINVALID_KEY 290 #endif class Dbtc: public SimulatedBlock { @@ -553,7 +555,7 @@ public: Uint32 expectedTransIdAI; AttributeBuffer transIdAI; // For accumulating TransId_AI - TcIndxReq tcIndxReq; + TcKeyReq tcIndxReq; UintR connectionIndex; UintR indexReadTcConnect; // @@ -864,11 +866,11 @@ public: UintR hashValue; /* THE HASH VALUE USED TO LOCATE FRAGMENT */ Uint8 distributionKeyIndicator; - Uint8 distributionGroupIndicator; - Uint8 distributionGroupType; + Uint8 m_special_hash; // collation or distribution key + Uint8 unused2; Uint8 lenAiInTckeyreq; /* LENGTH OF ATTRIBUTE INFORMATION IN TCKEYREQ */ - Uint8 distributionKey; + Uint8 fragmentDistributionKey; /* DIH generation no */ /** * EXECUTION MODE OF OPERATION @@ -892,16 +894,16 @@ public: // Second 16 byte cache line in second 64 byte cache // line. Diverse use. //--------------------------------------------------- - UintR distributionGroup; + UintR distributionKey; UintR nextCacheRec; - UintR distributionKeySize; + UintR unused3; Uint32 scanInfo; //--------------------------------------------------- // Third 16 byte cache line in second 64 // byte cache line. Diverse use. //--------------------------------------------------- - Uint32 scanNode; + Uint32 unused4; Uint32 scanTakeOverInd; UintR firstKeybuf; /* POINTER THE LINKED LIST OF KEY BUFFERS */ UintR lastKeybuf; /* VARIABLE POINTING TO THE LAST KEY BUFFER */ @@ -949,6 +951,7 @@ public: typedef Ptr<HostRecord> HostRecordPtr; /* *********** TABLE RECORD ********************************************* */ + /********************************************************/ /* THIS RECORD CONTAINS THE CURRENT SCHEMA VERSION OF */ /* ALL TABLES IN THE SYSTEM. */ @@ -959,14 +962,18 @@ public: Uint8 dropping; Uint8 tableType; Uint8 storedTable; + + Uint8 noOfKeyAttr; + Uint8 hasCharAttr; + Uint8 noOfDistrKeys; bool checkTable(Uint32 schemaVersion) const { return enabled && !dropping && (table_version_major(schemaVersion) == table_version_major(currentSchemaVersion)); } - + Uint32 getErrorCode(Uint32 schemaVersion) const; - + struct DropTable { Uint32 senderRef; Uint32 senderData; @@ -1145,7 +1152,7 @@ public: Uint32 nextScan; // Length of expected attribute information - Uint32 scanAiLength; + union { Uint32 scanAiLength; Uint32 m_booked_fragments_count; }; Uint32 scanKeyLen; @@ -1414,6 +1421,10 @@ private: void gcpTcfinished(Signal* signal); void handleGcp(Signal* signal); void hash(Signal* signal); + bool handle_special_hash(Uint32 dstHash[4], + Uint32* src, Uint32 srcLen, + Uint32 tabPtrI, bool distr); + void initApiConnect(Signal* signal); void initApiConnectRec(Signal* signal, ApiConnectRecord * const regApiPtr, @@ -1459,7 +1470,7 @@ private: void sendContinueTimeOutControl(Signal* signal, Uint32 TapiConPtr); void sendKeyinfo(Signal* signal, BlockReference TBRef, Uint32 len); void sendlqhkeyreq(Signal* signal, BlockReference TBRef); - void sendSystemError(Signal* signal); + void sendSystemError(Signal* signal, int line); void sendtckeyconf(Signal* signal, UintR TcommitFlag); void sendTcIndxConf(Signal* signal, UintR TcommitFlag); void unlinkApiConnect(Signal* signal); @@ -1535,8 +1546,8 @@ private: bool holdOperation = false); void releaseFiredTriggerData(DLFifoList<TcFiredTriggerData>* triggers); // Generated statement blocks - void warningHandlerLab(Signal* signal); - void systemErrorLab(Signal* signal); + void warningHandlerLab(Signal* signal, int line); + void systemErrorLab(Signal* signal, int line); void sendSignalErrorRefuseLab(Signal* signal); void scanTabRefLab(Signal* signal, Uint32 errCode); void diFcountReqLab(Signal* signal, ScanRecordPtr); @@ -1671,7 +1682,7 @@ private: c_scan_count = c_range_scan_count = 0; } Uint32 report(Signal* signal){ - signal->theData[0] = EventReport::TransReportCounters; + signal->theData[0] = NDB_LE_TransReportCounters; signal->theData[1] = ctransCount; signal->theData[2] = ccommitCount; signal->theData[3] = creadCount; diff --git a/ndb/src/kernel/blocks/dbtc/DbtcInit.cpp b/ndb/src/kernel/blocks/dbtc/DbtcInit.cpp index 2b1f079ea17..7bd308119fc 100644 --- a/ndb/src/kernel/blocks/dbtc/DbtcInit.cpp +++ b/ndb/src/kernel/blocks/dbtc/DbtcInit.cpp @@ -291,8 +291,19 @@ Dbtc::Dbtc(const class Configuration & conf): addRecSignal(GSN_ALTER_TAB_REQ, &Dbtc::execALTER_TAB_REQ); - initData(); - + cacheRecord = 0; + apiConnectRecord = 0; + tcConnectRecord = 0; + hostRecord = 0; + tableRecord = 0; + scanRecord = 0; + databufRecord = 0; + attrbufRecord = 0; + gcpRecord = 0; + tcFailRecord = 0; + c_apiConTimer = 0; + c_apiConTimer_line = 0; + #ifdef VM_TRACE { void* tmp[] = { &apiConnectptr, diff --git a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp index ab0981a98ef..9b0818e6e1f 100644 --- a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp +++ b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp @@ -20,6 +20,7 @@ #include "md5_hash.hpp" #include <RefConvert.hpp> #include <ndb_limits.h> +#include <my_sys.h> #include <signaldata/EventReport.hpp> #include <signaldata/TcKeyReq.hpp> @@ -63,6 +64,9 @@ #include <signaldata/PackedSignal.hpp> #include <AttributeHeader.hpp> #include <signaldata/DictTabInfo.hpp> +#include <AttributeDescriptor.hpp> +#include <SectionReader.hpp> +#include <KeyDescriptor.hpp> #include <NdbOut.hpp> #include <DebuggerNames.hpp> @@ -317,6 +321,10 @@ void Dbtc::execREAD_NODESREF(Signal* signal) void Dbtc::execTC_SCHVERREQ(Signal* signal) { jamEntry(); + if (! assembleFragments(signal)) { + jam(); + return; + } tabptr.i = signal->theData[0]; ptrCheckGuard(tabptr, ctabrecFilesize, tableRecord); tabptr.p->currentSchemaVersion = signal->theData[1]; @@ -324,10 +332,18 @@ void Dbtc::execTC_SCHVERREQ(Signal* signal) BlockReference retRef = signal->theData[3]; tabptr.p->tableType = (Uint8)signal->theData[4]; BlockReference retPtr = signal->theData[5]; + Uint32 noOfKeyAttr = signal->theData[6]; + ndbrequire(noOfKeyAttr <= MAX_ATTRIBUTES_IN_INDEX); + + const KeyDescriptor* desc = g_key_descriptor_pool.getPtr(tabptr.i); + ndbrequire(noOfKeyAttr == desc->noOfKeyAttr); ndbrequire(tabptr.p->enabled == false); tabptr.p->enabled = true; tabptr.p->dropping = false; + tabptr.p->noOfKeyAttr = desc->noOfKeyAttr; + tabptr.p->hasCharAttr = desc->hasCharAttr; + tabptr.p->noOfDistrKeys = desc->noOfDistrKeys; signal->theData[0] = tabptr.i; signal->theData[1] = retPtr; @@ -597,6 +613,8 @@ void Dbtc::execREAD_CONFIG_REQ(Signal* signal) theConfiguration.getOwnConfigIterator(); ndbrequire(p != 0); + initData(); + UintR apiConnect; UintR tcConnect; UintR tables; @@ -1017,7 +1035,7 @@ Dbtc::handleFailedApiNode(Signal* signal, /*********************************************************************/ // Not implemented yet. /*********************************************************************/ - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); break; case CS_RESTART: jam(); @@ -1041,7 +1059,7 @@ Dbtc::handleFailedApiNode(Signal* signal, /*********************************************************************/ default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); break; }//switch } else { @@ -1380,7 +1398,7 @@ void Dbtc::printState(Signal* signal, int place) << " keylen = " << regCachePtr->keylen << endl; } else { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if }//if #endif @@ -1434,7 +1452,7 @@ Dbtc::TCKEY_abort(Signal* signal, int place) return; case 6: jam(); - warningHandlerLab(signal); + warningHandlerLab(signal, __LINE__); return; case 7: @@ -1455,7 +1473,7 @@ Dbtc::TCKEY_abort(Signal* signal, int place) case 10: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 11: @@ -1486,7 +1504,7 @@ Dbtc::TCKEY_abort(Signal* signal, int place) /* PARTICULAR TC CONNECT RECORD. THIS MUST BE CAUSED BY NDB */ /* INTERNAL ERROR. */ /********************************************************************/ - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//switch return; @@ -1499,17 +1517,17 @@ Dbtc::TCKEY_abort(Signal* signal, int place) case 16: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 17: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 18: jam(); - warningHandlerLab(signal); + warningHandlerLab(signal, __LINE__); return; case 19: @@ -1518,22 +1536,22 @@ Dbtc::TCKEY_abort(Signal* signal, int place) case 20: jam(); - warningHandlerLab(signal); + warningHandlerLab(signal, __LINE__); return; case 21: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 22: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 23: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 24: @@ -1543,7 +1561,7 @@ Dbtc::TCKEY_abort(Signal* signal, int place) case 25: jam(); - warningHandlerLab(signal); + warningHandlerLab(signal, __LINE__); return; case 26: @@ -1551,7 +1569,7 @@ Dbtc::TCKEY_abort(Signal* signal, int place) return; case 27: - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); jam(); return; @@ -1562,92 +1580,92 @@ Dbtc::TCKEY_abort(Signal* signal, int place) case 29: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 30: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 31: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 32: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 33: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 34: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 35: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 36: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 37: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 38: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 39: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 40: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 41: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 42: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 43: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 44: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 45: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 46: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 47: @@ -1669,7 +1687,7 @@ Dbtc::TCKEY_abort(Signal* signal, int place) case 50: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; case 51: @@ -1745,7 +1763,7 @@ Dbtc::TCKEY_abort(Signal* signal, int place) default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//switch } @@ -1802,7 +1820,7 @@ void Dbtc::execKEYINFO(Signal* signal) return; default: jam(); - warningHandlerLab(signal); + warningHandlerLab(signal, __LINE__); return; }//switch @@ -2225,11 +2243,10 @@ void Dbtc::hash(Signal* signal) UintR Tdata2; UintR Tdata3; UintR* Tdata32; - Uint64 Tdata[512]; - + CacheRecord * const regCachePtr = cachePtr.p; - Tdata32 = (UintR*)&Tdata[0]; - + Tdata32 = signal->theData; + Tdata0 = regCachePtr->keydata[0]; Tdata1 = regCachePtr->keydata[1]; Tdata2 = regCachePtr->keydata[2]; @@ -2255,31 +2272,78 @@ void Dbtc::hash(Signal* signal) ti += 4; }//while }//if - UintR ThashValue; - UintR TdistrHashValue; - ThashValue = md5_hash((Uint64*)&Tdata32[0], (UintR)regCachePtr->keylen); - if (regCachePtr->distributionGroupIndicator == 1) { - if (regCachePtr->distributionGroupType == 1) { - jam(); - TdistrHashValue = (regCachePtr->distributionGroup << 6); - } else { - jam(); - Tdata32[0] = regCachePtr->distributionGroup; - TdistrHashValue = md5_hash((Uint64*)&Tdata32[0], (UintR)1); - }//if - } else if (regCachePtr->distributionKeyIndicator == 1) { + UintR keylen = (UintR)regCachePtr->keylen; + Uint32 distKey = regCachePtr->distributionKeyIndicator; + + Uint32 tmp[4]; + if(!regCachePtr->m_special_hash) + { + md5_hash(tmp, (Uint64*)&Tdata32[0], keylen); + } + else + { + handle_special_hash(tmp, Tdata32, keylen, regCachePtr->tableref, !distKey); + } + + thashValue = tmp[0]; + if (distKey){ jam(); - TdistrHashValue = md5_hash((Uint64*)&Tdata32[0], - (UintR)regCachePtr->distributionKeySize); + tdistrHashValue = regCachePtr->distributionKey; } else { jam(); - TdistrHashValue = ThashValue; + tdistrHashValue = tmp[1]; }//if - thashValue = ThashValue; - tdistrHashValue = TdistrHashValue; }//Dbtc::hash() +bool +Dbtc::handle_special_hash(Uint32 dstHash[4], Uint32* src, Uint32 srcLen, + Uint32 tabPtrI, + bool distr) +{ + Uint64 Tmp[MAX_KEY_SIZE_IN_WORDS * MAX_XFRM_MULTIPLY]; + const TableRecord* tabPtrP = &tableRecord[tabPtrI]; + const bool hasCharAttr = tabPtrP->hasCharAttr; + const bool hasDistKeys = tabPtrP->noOfDistrKeys > 0; + + Uint32 *dst = (Uint32*)Tmp; + Uint32 dstPos = 0; + Uint32 keyPartLen[MAX_ATTRIBUTES_IN_INDEX]; + Uint32 * keyPartLenPtr; + if(hasCharAttr) + { + keyPartLenPtr = keyPartLen; + dstPos = xfrm_key(tabPtrI, src, dst, sizeof(Tmp) >> 2, keyPartLenPtr); + if (unlikely(dstPos == 0)) + { + goto error; + } + } + else + { + dst = src; + dstPos = srcLen; + keyPartLenPtr = 0; + } + + md5_hash(dstHash, (Uint64*)dst, dstPos); + + if(distr && hasDistKeys) + { + jam(); + + Uint32 tmp[4]; + Uint32 len = create_distr_key(tabPtrI, dst, keyPartLenPtr); + md5_hash(tmp, (Uint64*)dst, len); + dstHash[1] = tmp[1]; + } + return true; // success + +error: + terrorCode = ZINVALID_KEY; + return false; +} + /* INIT_API_CONNECT_REC --------------------------- @@ -2672,18 +2736,13 @@ void Dbtc::execTCKEYREQ(Signal* signal) Uint8 TSimpleFlag = tcKeyReq->getSimpleFlag(Treqinfo); Uint8 TDirtyFlag = tcKeyReq->getDirtyFlag(Treqinfo); Uint8 TInterpretedFlag = tcKeyReq->getInterpretedFlag(Treqinfo); - Uint8 TDistrGroupFlag = tcKeyReq->getDistributionGroupFlag(Treqinfo); - Uint8 TDistrGroupTypeFlag = tcKeyReq->getDistributionGroupTypeFlag(Treqinfo); Uint8 TDistrKeyFlag = tcKeyReq->getDistributionKeyFlag(Treqinfo); Uint8 TexecuteFlag = TexecFlag; regCachePtr->opSimple = TSimpleFlag; regCachePtr->opExec = TInterpretedFlag; regTcPtr->dirtyOp = TDirtyFlag; - - regCachePtr->distributionGroupIndicator = TDistrGroupFlag; - regCachePtr->distributionGroupType = TDistrGroupTypeFlag; - regCachePtr->distributionKeyIndicator = TDistrKeyFlag; + regCachePtr->distributionKeyIndicator = TDistrKeyFlag; //------------------------------------------------------------- // The next step is to read the upto three conditional words. @@ -2692,17 +2751,14 @@ void Dbtc::execTCKEYREQ(Signal* signal) Uint32* TOptionalDataPtr = (Uint32*)&tcKeyReq->scanInfo; { Uint32 TDistrGHIndex = tcKeyReq->getScanIndFlag(Treqinfo); - Uint32 TDistrKeyIndex = TDistrGHIndex + TDistrGroupFlag; + Uint32 TDistrKeyIndex = TDistrGHIndex; - Uint32 TscanNode = tcKeyReq->getTakeOverScanNode(TOptionalDataPtr[0]); Uint32 TscanInfo = tcKeyReq->getTakeOverScanInfo(TOptionalDataPtr[0]); regCachePtr->scanTakeOverInd = TDistrGHIndex; - regCachePtr->scanNode = TscanNode; regCachePtr->scanInfo = TscanInfo; - regCachePtr->distributionGroup = TOptionalDataPtr[TDistrGHIndex]; - regCachePtr->distributionKeySize = TOptionalDataPtr[TDistrKeyIndex]; + regCachePtr->distributionKey = TOptionalDataPtr[TDistrKeyIndex]; TkeyIndex = TDistrKeyIndex + TDistrKeyFlag; } @@ -2736,7 +2792,8 @@ void Dbtc::execTCKEYREQ(Signal* signal) regCachePtr->keylen = TkeyLength; regCachePtr->lenAiInTckeyreq = titcLenAiInTckeyreq; regCachePtr->currReclenAi = titcLenAiInTckeyreq; - + regCachePtr->m_special_hash = + localTabptr.p->hasCharAttr | (localTabptr.p->noOfDistrKeys > 0); Tdata1 = TAIDataPtr[0]; Tdata2 = TAIDataPtr[1]; Tdata3 = TAIDataPtr[2]; @@ -2893,7 +2950,15 @@ void Dbtc::tckeyreq050Lab(Signal* signal) UintR tnoOfStandby; UintR tnodeinfo; + terrorCode = 0; + hash(signal); /* NOW IT IS TIME TO CALCULATE THE HASH VALUE*/ + + if (unlikely(terrorCode)) + { + releaseAtErrorLab(signal); + return; + } CacheRecord * const regCachePtr = cachePtr.p; TcConnectRecord * const regTcPtr = tcConnectptr.p; @@ -2943,6 +3008,15 @@ void Dbtc::tckeyreq050Lab(Signal* signal) execDIGETNODESREF(signal); return; } + + if(ERROR_INSERTED(8050) && signal->theData[3] != getOwnNodeId()) + { + ndbassert(false); + signal->theData[1] = 626; + execDIGETNODESREF(signal); + return; + } + /****************>>*/ /* DIGETNODESCONF >*/ /* ***************>*/ @@ -2967,7 +3041,7 @@ void Dbtc::tckeyreq050Lab(Signal* signal) tnoOfBackup = tnodeinfo & 3; tnoOfStandby = (tnodeinfo >> 8) & 3; - regCachePtr->distributionKey = (tnodeinfo >> 16) & 255; + regCachePtr->fragmentDistributionKey = (tnodeinfo >> 16) & 255; if (Toperation == ZREAD) { if (Tdirty == 1) { jam(); @@ -2977,7 +3051,7 @@ void Dbtc::tckeyreq050Lab(Signal* signal) /* NODE IF POSSIBLE TO AVOID UNNECESSARY COMMUNICATION */ /* WITH SIMPLE READS. */ /*-------------------------------------------------------------*/ - arrGuard(tnoOfBackup, 4); + arrGuard(tnoOfBackup, MAX_REPLICAS); UintR Tindex; UintR TownNode = cownNodeid; for (Tindex = 1; Tindex <= tnoOfBackup; Tindex++) { @@ -3032,7 +3106,7 @@ void Dbtc::tckeyreq050Lab(Signal* signal) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//switch attrinfoDihReceivedLab(signal); @@ -3057,8 +3131,6 @@ void Dbtc::attrinfoDihReceivedLab(Signal* signal) CacheRecord * const regCachePtr = cachePtr.p; TcConnectRecord * const regTcPtr = tcConnectptr.p; Uint16 Tnode = regTcPtr->tcNodedata[0]; - Uint16 TscanTakeOverInd = regCachePtr->scanTakeOverInd; - Uint16 TscanNode = regCachePtr->scanNode; TableRecordPtr localTabptr; localTabptr.i = regCachePtr->tableref; @@ -3071,11 +3143,6 @@ void Dbtc::attrinfoDihReceivedLab(Signal* signal) TCKEY_abort(signal, 58); return; } - if ((TscanTakeOverInd == 1) && - (Tnode != TscanNode)) { - TCKEY_abort(signal, 15); - return; - }//if arrGuard(Tnode, MAX_NDB_NODES); packLqhkeyreq(signal, calcLqhBlockRef(Tnode)); }//Dbtc::attrinfoDihReceivedLab() @@ -3107,7 +3174,7 @@ void Dbtc::sendlqhkeyreq(Signal* signal, UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6; #ifdef ERROR_INSERT if (ERROR_INSERTED(8002)) { - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if if (ERROR_INSERTED(8007)) { if (apiConnectptr.p->apiConnectstate == CS_STARTED) { @@ -3138,7 +3205,7 @@ void Dbtc::sendlqhkeyreq(Signal* signal, /* ---------------------------------------------------------------------- */ // Bit16 == 0 since StoredProcedures are not yet supported. /* ---------------------------------------------------------------------- */ - LqhKeyReq::setDistributionKey(tslrAttrLen, regCachePtr->distributionKey); + LqhKeyReq::setDistributionKey(tslrAttrLen, regCachePtr->fragmentDistributionKey); LqhKeyReq::setScanTakeOverFlag(tslrAttrLen, regCachePtr->scanTakeOverInd); Tdata10 = 0; @@ -3374,7 +3441,7 @@ void Dbtc::releaseAttrinfo() regApiPtr->cachePtr = RNIL; return; }//if - systemErrorLab(0); + systemErrorLab(0, __LINE__); return; }//Dbtc::releaseAttrinfo() @@ -3487,7 +3554,7 @@ void Dbtc::execPACKED_SIGNAL(Signal* signal) Tlength = signal->length(); if (Tlength > 25) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//if Uint32* TpackDataPtr; @@ -3542,7 +3609,7 @@ void Dbtc::execPACKED_SIGNAL(Signal* signal) Tstep += LqhKeyConf::SignalLength; break; default: - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//switch }//while @@ -3620,7 +3687,7 @@ void Dbtc::execLQHKEYCONF(Signal* signal) #ifdef ERROR_INSERT if (ERROR_INSERTED(8029)) { - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if if (ERROR_INSERTED(8003)) { if (regApiPtr->apiConnectstate == CS_STARTED) { @@ -4105,7 +4172,7 @@ void Dbtc::diverify010Lab(Signal* signal) signal->theData[0] = apiConnectptr.i; if (ERROR_INSERTED(8022)) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if if (TfirstfreeApiConnectCopy != RNIL) { seizeApiConnectCopy(signal); @@ -4453,7 +4520,7 @@ void Dbtc::execCOMMITTED(Signal* signal) return; }//if if (ERROR_INSERTED(8030)) { - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if if (ERROR_INSERTED(8025)) { SET_ERROR_INSERT_VALUE(8026); @@ -4507,7 +4574,7 @@ void Dbtc::execCOMMITTED(Signal* signal) }//if if (ERROR_INSERTED(8020)) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if /*-------------------------------------------------------*/ /* THE ENTIRE TRANSACTION IS NOW COMMITED */ @@ -4554,8 +4621,9 @@ void Dbtc::sendApiCommit(Signal* signal) } commitConf->transId1 = regApiPtr->transid[0]; commitConf->transId2 = regApiPtr->transid[1]; - - sendSignal(regApiPtr->ndbapiBlockref, GSN_TC_COMMITCONF, signal, 3, JBB); + commitConf->gci = regApiPtr->globalcheckpointid; + sendSignal(regApiPtr->ndbapiBlockref, GSN_TC_COMMITCONF, signal, + TcCommitConf::SignalLength, JBB); } else if (regApiPtr->returnsignal == RS_NO_RETURN) { jam(); } else { @@ -4747,7 +4815,7 @@ Dbtc::execTC_COMMIT_ACK(Signal* signal){ m_commitAckMarkerHash.release(removedMarker, key); if (removedMarker.i == RNIL) { jam(); - warningHandlerLab(signal); + warningHandlerLab(signal, __LINE__); return; }//if sendRemoveMarkers(signal, removedMarker.p); @@ -4809,7 +4877,7 @@ void Dbtc::execCOMPLETED(Signal* signal) #ifdef ERROR_INSERT if (ERROR_INSERTED(8031)) { - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if if (ERROR_INSERTED(8019)) { CLEAR_ERROR_INSERT_VALUE; @@ -4868,7 +4936,7 @@ void Dbtc::execCOMPLETED(Signal* signal) }//if if (ERROR_INSERTED(8021)) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if apiConnectptr = localApiConnectptr; releaseTransResources(signal); @@ -5252,8 +5320,9 @@ void Dbtc::execTC_COMMITREQ(Signal* signal) commitConf->apiConnectPtr = apiConnectPtr; commitConf->transId1 = transId1; commitConf->transId2 = transId2; - - sendSignal(apiBlockRef, GSN_TC_COMMITCONF, signal, 3, JBB); + commitConf->gci = 0; + sendSignal(apiBlockRef, GSN_TC_COMMITCONF, signal, + TcCommitConf::SignalLength, JBB); regApiPtr->returnsignal = RS_NO_RETURN; releaseAbortResources(signal); @@ -5308,7 +5377,7 @@ void Dbtc::execTC_COMMITREQ(Signal* signal) return; break; default: - warningHandlerLab(signal); + warningHandlerLab(signal, __LINE__); return; }//switch TcCommitRef * const commitRef = (TcCommitRef*)&signal->theData[0]; @@ -5321,7 +5390,7 @@ void Dbtc::execTC_COMMITREQ(Signal* signal) return; } else /** apiConnectptr.i < capiConnectFilesize */ { jam(); - warningHandlerLab(signal); + warningHandlerLab(signal, __LINE__); return; } }//Dbtc::execTC_COMMITREQ() @@ -5417,12 +5486,12 @@ void Dbtc::execTCROLLBACKREQ(Signal* signal) TC_ROLL_warning: jam(); - warningHandlerLab(signal); + warningHandlerLab(signal, __LINE__); return; TC_ROLL_system_error: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//Dbtc::execTCROLLBACKREQ() @@ -5663,7 +5732,7 @@ void Dbtc::errorReport(Signal* signal, int place) jam(); break; }//switch - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//Dbtc::errorReport() @@ -5720,7 +5789,7 @@ void Dbtc::execABORTED(Signal* signal) }//if if (ERROR_INSERTED(8024)) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if /** @@ -5893,12 +5962,12 @@ ABORT020: case OS_ABORT_SENT: jam(); DEBUG("ABORT_SENT state in abort015Lab(), not expected"); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; default: jam(); DEBUG("tcConnectstate = " << tcConnectptr.p->tcConnectstate); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//switch @@ -6014,7 +6083,7 @@ void Dbtc::checkStartTimeout(Signal* signal) ctimeOutMissedHeartbeats++; if (ctimeOutMissedHeartbeats > 100){ jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); } } ctimeOutCheckLastHeartbeat = ctimeOutCheckHeartbeat; @@ -6203,7 +6272,7 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr, Uint32 errCode) if (((ctcTimer - getApiConTimer(apiConnectptr.i)) > (10 * ctimeOutValue)) && ((ctcTimer - getApiConTimer(apiConnectptr.i)) > 500)) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if break; case CS_COMMIT_SENT: @@ -6253,7 +6322,7 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr, Uint32 errCode) jam(); tcConnectptr.i = apiConnectptr.p->currentTcConnect; ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord); - arrGuard(apiConnectptr.p->currentReplicaNo, 4); + arrGuard(apiConnectptr.p->currentReplicaNo, MAX_REPLICAS); hostptr.i = tcConnectptr.p->tcNodedata[apiConnectptr.p->currentReplicaNo]; ptrCheckGuard(hostptr, chostFilesize, hostRecord); if (hostptr.p->hostStatus == HS_ALIVE) { @@ -6279,7 +6348,7 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr, Uint32 errCode) jam(); tcConnectptr.i = apiConnectptr.p->currentTcConnect; ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord); - arrGuard(apiConnectptr.p->currentReplicaNo, 4); + arrGuard(apiConnectptr.p->currentReplicaNo, MAX_REPLICAS); hostptr.i = tcConnectptr.p->tcNodedata[apiConnectptr.p->currentReplicaNo]; ptrCheckGuard(hostptr, chostFilesize, hostRecord); if (hostptr.p->hostStatus == HS_ALIVE) { @@ -6305,7 +6374,7 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr, Uint32 errCode) jam(); tcConnectptr.i = apiConnectptr.p->currentTcConnect; ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord); - arrGuard(apiConnectptr.p->currentReplicaNo, 4); + arrGuard(apiConnectptr.p->currentReplicaNo, MAX_REPLICAS); hostptr.i = tcConnectptr.p->tcNodedata[apiConnectptr.p->currentReplicaNo]; ptrCheckGuard(hostptr, chostFilesize, hostRecord); if (hostptr.p->hostStatus == HS_ALIVE) { @@ -6351,7 +6420,7 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr, Uint32 errCode) /* AN IMPOSSIBLE STATE IS SET. CRASH THE SYSTEM. */ /*------------------------------------------------------------------*/ DEBUG("State = " << apiConnectptr.p->apiConnectstate); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//switch return; @@ -6480,7 +6549,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) // in time to the ABORT signal we will declare it as dead. /*------------------------------------------------------------------*/ UintR Ti = 0; - arrGuard(tcConnectptr.p->noOfNodes, 4); + arrGuard(tcConnectptr.p->noOfNodes, MAX_REPLICAS+1); for (Ti = 0; Ti < tcConnectptr.p->noOfNodes; Ti++) { jam(); if (tcConnectptr.p->tcNodedata[Ti] != 0) { @@ -6659,7 +6728,7 @@ void Dbtc::execSCAN_HBREP(Signal* signal) break; default: DEBUG("execSCAN_HBREP: scanFragState="<<scanFragptr.p->scanFragState); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); break; } @@ -6757,7 +6826,7 @@ void Dbtc::timeOutFoundFragLab(Signal* signal, UintR TscanConPtr) * version. In a release version we will simply set the time-out to zero. *-----------------------------------------------------------------------*/ #ifdef VM_TRACE - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); #endif scanFragptr.p->stopFragTimer(); break; @@ -6766,7 +6835,7 @@ void Dbtc::timeOutFoundFragLab(Signal* signal, UintR TscanConPtr) /*----------------------------------------------------------------------- * Non-existent state. Crash. *-----------------------------------------------------------------------*/ - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); break; }//switch @@ -7062,7 +7131,6 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal) return; } - checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER); }//Dbtc::execTAKE_OVERTCCONF() @@ -7290,7 +7358,7 @@ void Dbtc::completeTransAtTakeOverDoLast(Signal* signal, UintR TtakeOverInd) arrGuard(TtakeOverInd, MAX_NDB_NODES); if (tcNodeFailptr.p->takeOverProcState[TtakeOverInd] != ZTAKE_OVER_ACTIVE) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//if tcNodeFailptr.p->takeOverProcState[TtakeOverInd] = ZTAKE_OVER_IDLE; @@ -7411,7 +7479,7 @@ void Dbtc::completeTransAtTakeOverDoOne(Signal* signal, UintR TtakeOverInd) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//switch }//Dbtc::completeTransAtTakeOverDoOne() @@ -7488,7 +7556,7 @@ void Dbtc::execABORTCONF(Signal* signal) warningReport(signal, 18); return; }//if - arrGuard(apiConnectptr.p->currentReplicaNo, 4); + arrGuard(apiConnectptr.p->currentReplicaNo, MAX_REPLICAS); if (tcConnectptr.p->tcNodedata[apiConnectptr.p->currentReplicaNo] != tnodeid) { warningReport(signal, 19); @@ -7504,7 +7572,7 @@ void Dbtc::toAbortHandlingLab(Signal* signal) do { if (tcurrentReplicaNo != (Uint8)Z8NIL) { jam(); - arrGuard(tcurrentReplicaNo, 4); + arrGuard(tcurrentReplicaNo, MAX_REPLICAS); const LqhTransConf::OperationStatus stat = (LqhTransConf::OperationStatus) tcConnectptr.p->failData[tcurrentReplicaNo]; @@ -7537,7 +7605,7 @@ void Dbtc::toAbortHandlingLab(Signal* signal) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//switch }//if @@ -7638,7 +7706,7 @@ void Dbtc::execCOMMITCONF(Signal* signal) warningReport(signal, 10); return; }//if - arrGuard(apiConnectptr.p->currentReplicaNo, 4); + arrGuard(apiConnectptr.p->currentReplicaNo, MAX_REPLICAS); if (tcConnectptr.p->tcNodedata[apiConnectptr.p->currentReplicaNo] != tnodeid) { warningReport(signal, 11); @@ -7646,7 +7714,7 @@ void Dbtc::execCOMMITCONF(Signal* signal) }//if if (ERROR_INSERTED(8026)) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if tcurrentReplicaNo = (Uint8)Z8NIL; tcConnectptr.p->tcConnectstate = OS_COMMITTED; @@ -7658,7 +7726,7 @@ void Dbtc::toCommitHandlingLab(Signal* signal) do { if (tcurrentReplicaNo != (Uint8)Z8NIL) { jam(); - arrGuard(tcurrentReplicaNo, 4); + arrGuard(tcurrentReplicaNo, MAX_REPLICAS); switch (tcConnectptr.p->failData[tcurrentReplicaNo]) { case LqhTransConf::InvalidStatus: jam(); @@ -7696,7 +7764,7 @@ void Dbtc::toCommitHandlingLab(Signal* signal) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; break; }//switch @@ -7783,7 +7851,7 @@ void Dbtc::execCOMPLETECONF(Signal* signal) warningReport(signal, 14); return; }//if - arrGuard(apiConnectptr.p->currentReplicaNo, 4); + arrGuard(apiConnectptr.p->currentReplicaNo, MAX_REPLICAS); if (tcConnectptr.p->tcNodedata[apiConnectptr.p->currentReplicaNo] != tnodeid) { warningReport(signal, 15); @@ -7791,7 +7859,7 @@ void Dbtc::execCOMPLETECONF(Signal* signal) }//if if (ERROR_INSERTED(8028)) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if tcConnectptr.p->tcConnectstate = OS_COMPLETED; tcurrentReplicaNo = (Uint8)Z8NIL; @@ -7803,7 +7871,7 @@ void Dbtc::toCompleteHandlingLab(Signal* signal) do { if (tcurrentReplicaNo != (Uint8)Z8NIL) { jam(); - arrGuard(tcurrentReplicaNo, 4); + arrGuard(tcurrentReplicaNo, MAX_REPLICAS); switch (tcConnectptr.p->failData[tcurrentReplicaNo]) { case LqhTransConf::InvalidStatus: jam(); @@ -7906,7 +7974,7 @@ FAF_LOOP: jam(); if (cfirstfreeApiConnectFail == RNIL) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//if seizeApiConnectFail(signal); @@ -7950,7 +8018,7 @@ void Dbtc::findTcConnectFail(Signal* signal) jam(); if (cfirstfreeTcConnectFail == RNIL) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//if seizeTcConnectFail(signal); @@ -8010,7 +8078,7 @@ void Dbtc::initApiConnectFail(Signal* signal) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if apiConnectptr.p->commitAckMarker = RNIL; if(LqhTransConf::getMarkerFlag(treqinfo)){ @@ -8093,6 +8161,7 @@ void Dbtc::setupFailData(Signal* signal) case OS_PREPARED: case OS_COMMITTING: jam(); + arrGuard(tcConnectptr.p->lastReplicaNo, MAX_REPLICAS); for (tindex = 0; tindex <= tcConnectptr.p->lastReplicaNo; tindex++) { jam(); /*------------------------------------------------------------------- @@ -8100,13 +8169,13 @@ void Dbtc::setupFailData(Signal* signal) * IN THIS CASE ALL LQH'S ARE PREPARED AND WAITING FOR * COMMIT/ABORT DECISION. *------------------------------------------------------------------*/ - arrGuard(tindex, 4); tcConnectptr.p->failData[tindex] = LqhTransConf::Prepared; }//for break; case OS_COMMITTED: case OS_COMPLETING: jam(); + arrGuard(tcConnectptr.p->lastReplicaNo, MAX_REPLICAS); for (tindex = 0; tindex <= tcConnectptr.p->lastReplicaNo; tindex++) { jam(); /*------------------------------------------------------------------- @@ -8114,25 +8183,24 @@ void Dbtc::setupFailData(Signal* signal) * IN THIS CASE ALL LQH'S ARE COMMITTED AND WAITING FOR * COMPLETE MESSAGE. *------------------------------------------------------------------*/ - arrGuard(tindex, 4); tcConnectptr.p->failData[tindex] = LqhTransConf::Committed; }//for break; case OS_COMPLETED: jam(); + arrGuard(tcConnectptr.p->lastReplicaNo, MAX_REPLICAS); for (tindex = 0; tindex <= tcConnectptr.p->lastReplicaNo; tindex++) { jam(); /*------------------------------------------------------------------- * KEYDATA IS USED TO KEEP AN INDICATION OF STATE IN LQH. * IN THIS CASE ALL LQH'S ARE COMPLETED. *-------------------------------------------------------------------*/ - arrGuard(tindex, 4); tcConnectptr.p->failData[tindex] = LqhTransConf::InvalidStatus; }//for break; default: jam(); - sendSystemError(signal); + sendSystemError(signal, __LINE__); break; }//switch if (tabortInd != ZCOMMIT_SETUP) { @@ -8219,7 +8287,7 @@ void Dbtc::updateApiStateFail(Signal* signal) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); break; }//switch break; @@ -8242,7 +8310,7 @@ void Dbtc::updateApiStateFail(Signal* signal) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); break; }//switch break; @@ -8252,7 +8320,7 @@ void Dbtc::updateApiStateFail(Signal* signal) case CS_FAIL_COMMITTING: case CS_FAIL_COMMITTED: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); break; case CS_FAIL_PREPARED: jam(); @@ -8265,7 +8333,7 @@ void Dbtc::updateApiStateFail(Signal* signal) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); break; }//switch break; @@ -8274,7 +8342,7 @@ void Dbtc::updateApiStateFail(Signal* signal) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); break; }//switch }//Dbtc::updateApiStateFail() @@ -8392,14 +8460,14 @@ void Dbtc::releaseAtErrorLab(Signal* signal) abortErrorLab(signal); }//Dbtc::releaseAtErrorLab() -void Dbtc::warningHandlerLab(Signal* signal) +void Dbtc::warningHandlerLab(Signal* signal, int line) { ndbassert(false); }//Dbtc::warningHandlerLab() -void Dbtc::systemErrorLab(Signal* signal) +void Dbtc::systemErrorLab(Signal* signal, int line) { - progError(0, 0); + progError(line, NDBD_EXIT_NDBREQUIRE); }//Dbtc::systemErrorLab() @@ -8500,7 +8568,7 @@ void Dbtc::systemErrorLab(Signal* signal) void Dbtc::execSCAN_TABREQ(Signal* signal) { const ScanTabReq * const scanTabReq = (ScanTabReq *)&signal->theData[0]; - const Uint32 reqinfo = scanTabReq->requestInfo; + const Uint32 ri = scanTabReq->requestInfo; const Uint32 aiLength = (scanTabReq->attrLenKeyLen & 0xFFFF); const Uint32 keyLen = scanTabReq->attrLenKeyLen >> 16; const Uint32 schemaVersion = scanTabReq->tableSchemaVersion; @@ -8510,8 +8578,8 @@ void Dbtc::execSCAN_TABREQ(Signal* signal) const Uint32 buddyPtr = (tmpXX == 0xFFFFFFFF ? RNIL : tmpXX); Uint32 currSavePointId = 0; - Uint32 scanConcurrency = scanTabReq->getParallelism(reqinfo); - Uint32 noOprecPerFrag = ScanTabReq::getScanBatch(reqinfo); + Uint32 scanConcurrency = scanTabReq->getParallelism(ri); + Uint32 noOprecPerFrag = ScanTabReq::getScanBatch(ri); Uint32 scanParallel = scanConcurrency; Uint32 errCode; ScanRecordPtr scanptr; @@ -8529,7 +8597,7 @@ void Dbtc::execSCAN_TABREQ(Signal* signal) if (apiConnectptr.i >= capiConnectFilesize) { jam(); - warningHandlerLab(signal); + warningHandlerLab(signal, __LINE__); return; }//if @@ -8596,6 +8664,8 @@ void Dbtc::execSCAN_TABREQ(Signal* signal) seizeCacheRecord(signal); cachePtr.p->keylen = keyLen; cachePtr.p->save1 = 0; + cachePtr.p->distributionKey = scanTabReq->distributionKey; + cachePtr.p->distributionKeyIndicator= ScanTabReq::getDistributionKeyFlag(ri); scanptr = seizeScanrec(signal); ndbrequire(transP->apiScanRec == RNIL); @@ -8682,6 +8752,7 @@ void Dbtc::initScanrec(ScanRecordPtr scanptr, UintR scanParallel, UintR noOprecPerFrag) { + const UintR ri = scanTabReq->requestInfo; scanptr.p->scanTcrec = tcConnectptr.i; scanptr.p->scanApiRec = apiConnectptr.i; scanptr.p->scanAiLength = scanTabReq->attrLenKeyLen & 0xFFFF; @@ -8694,12 +8765,13 @@ void Dbtc::initScanrec(ScanRecordPtr scanptr, scanptr.p->batch_size_rows = noOprecPerFrag; Uint32 tmp = 0; - const UintR ri = scanTabReq->requestInfo; ScanFragReq::setLockMode(tmp, ScanTabReq::getLockMode(ri)); ScanFragReq::setHoldLockFlag(tmp, ScanTabReq::getHoldLockFlag(ri)); ScanFragReq::setKeyinfoFlag(tmp, ScanTabReq::getKeyinfoFlag(ri)); ScanFragReq::setReadCommittedFlag(tmp,ScanTabReq::getReadCommittedFlag(ri)); ScanFragReq::setRangeScanFlag(tmp, ScanTabReq::getRangeScanFlag(ri)); + ScanFragReq::setDescendingFlag(tmp, ScanTabReq::getDescendingFlag(ri)); + ScanFragReq::setTupScanFlag(tmp, ScanTabReq::getTupScanFlag(ri)); ScanFragReq::setAttrLen(tmp, scanTabReq->attrLenKeyLen & 0xFFFF); scanptr.p->scanRequestInfo = tmp; @@ -8810,14 +8882,43 @@ void Dbtc::diFcountReqLab(Signal* signal, ScanRecordPtr scanptr) return; } + scanptr.p->scanNextFragId = 0; + scanptr.p->m_booked_fragments_count= 0; scanptr.p->scanState = ScanRecord::WAIT_FRAGMENT_COUNT; - /************************************************* - * THE FIRST STEP TO RECEIVE IS SUCCESSFULLY COMPLETED. - * WE MUST FIRST GET THE NUMBER OF FRAGMENTS IN THE TABLE. - ***************************************************/ - signal->theData[0] = tcConnectptr.p->dihConnectptr; - signal->theData[1] = scanptr.p->scanTableref; - sendSignal(cdihblockref, GSN_DI_FCOUNTREQ, signal, 2, JBB); + + if(!cachePtr.p->distributionKeyIndicator) + { + jam(); + /************************************************* + * THE FIRST STEP TO RECEIVE IS SUCCESSFULLY COMPLETED. + * WE MUST FIRST GET THE NUMBER OF FRAGMENTS IN THE TABLE. + ***************************************************/ + signal->theData[0] = tcConnectptr.p->dihConnectptr; + signal->theData[1] = scanptr.p->scanTableref; + sendSignal(cdihblockref, GSN_DI_FCOUNTREQ, signal, 2, JBB); + } + else + { + signal->theData[0] = tcConnectptr.p->dihConnectptr; + signal->theData[1] = tabPtr.i; + signal->theData[2] = cachePtr.p->distributionKey; + EXECUTE_DIRECT(DBDIH, GSN_DIGETNODESREQ, signal, 3); + UintR TerrorIndicator = signal->theData[0]; + jamEntry(); + if (TerrorIndicator != 0) { + signal->theData[0] = tcConnectptr.i; + //signal->theData[1] Contains error + execDI_FCOUNTREF(signal); + return; + } + + UintR Tdata1 = signal->theData[1]; + scanptr.p->scanNextFragId = Tdata1; + + signal->theData[0] = tcConnectptr.i; + signal->theData[1] = 1; // Frag count + execDI_FCOUNTCONF(signal); + } return; }//Dbtc::diFcountReqLab() @@ -8834,7 +8935,7 @@ void Dbtc::execDI_FCOUNTCONF(Signal* signal) { jamEntry(); tcConnectptr.i = signal->theData[0]; - const UintR tfragCount = signal->theData[1]; + Uint32 tfragCount = signal->theData[1]; ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord); apiConnectptr.i = tcConnectptr.p->apiConnect; ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord); @@ -8869,24 +8970,17 @@ void Dbtc::execDI_FCOUNTCONF(Signal* signal) return; } - if(scanptr.p->scanParallel > tfragCount){ - jam(); - abortScanLab(signal, scanptr, ZTOO_HIGH_CONCURRENCY_ERROR); - return; - } - scanptr.p->scanParallel = tfragCount; scanptr.p->scanNoFrag = tfragCount; - scanptr.p->scanNextFragId = 0; scanptr.p->scanState = ScanRecord::RUNNING; setApiConTimer(apiConnectptr.i, 0, __LINE__); updateBuddyTimer(apiConnectptr); ScanFragRecPtr ptr; - ScanFragList list(c_scan_frag_pool, - scanptr.p->m_running_scan_frags); - for (list.first(ptr); !ptr.isNull(); list.next(ptr)){ + ScanFragList list(c_scan_frag_pool, scanptr.p->m_running_scan_frags); + for (list.first(ptr); !ptr.isNull() && tfragCount; + list.next(ptr), tfragCount--){ jam(); ptr.p->lqhBlockref = 0; @@ -8901,6 +8995,22 @@ void Dbtc::execDI_FCOUNTCONF(Signal* signal) signal->theData[3] = ptr.p->scanFragId; sendSignal(cdihblockref, GSN_DIGETPRIMREQ, signal, 4, JBB); }//for + + ScanFragList queued(c_scan_frag_pool, scanptr.p->m_queued_scan_frags); + for (; !ptr.isNull();) + { + ptr.p->m_ops = 0; + ptr.p->m_totalLen = 0; + ptr.p->m_scan_frag_conf_status = 1; + ptr.p->scanFragState = ScanFragRec::QUEUED_FOR_DELIVERY; + ptr.p->stopFragTimer(); + + ScanFragRecPtr tmp = ptr; + list.next(ptr); + list.remove(tmp); + queued.add(tmp); + scanptr.p->m_queued_count++; + } }//Dbtc::execDI_FCOUNTCONF() /****************************************************** @@ -9141,7 +9251,7 @@ void Dbtc::execSCAN_FRAGREF(Signal* signal) transid1 = transid1 | transid2; if (transid1 != 0) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if /** @@ -9237,7 +9347,7 @@ void Dbtc::execSCAN_FRAGCONF(Signal* signal) transid1 = transid1 | transid2; if (transid1 != 0) { jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); }//if ndbrequire(scanFragptr.p->scanFragState == ScanFragRec::LQH_ACTIVE); @@ -9262,7 +9372,7 @@ void Dbtc::execSCAN_FRAGCONF(Signal* signal) } if(noCompletedOps == 0 && status != 0 && - scanptr.p->scanNextFragId < scanptr.p->scanNoFrag){ + scanptr.p->scanNextFragId+scanptr.p->m_booked_fragments_count < scanptr.p->scanNoFrag){ /** * Start on next fragment */ @@ -9325,7 +9435,7 @@ void Dbtc::execSCAN_NEXTREQ(Signal* signal) apiConnectptr.i = req->apiConnectPtr; if (apiConnectptr.i >= capiConnectFilesize) { jam(); - warningHandlerLab(signal); + warningHandlerLab(signal, __LINE__); return; }//if ptrAss(apiConnectptr, apiConnectRecord); @@ -9432,6 +9542,9 @@ void Dbtc::execSCAN_NEXTREQ(Signal* signal) */ jam(); ndbrequire(scanptr.p->scanNextFragId < scanptr.p->scanNoFrag); + jam(); + ndbassert(scanptr.p->m_booked_fragments_count); + scanptr.p->m_booked_fragments_count--; scanFragptr.p->scanFragState = ScanFragRec::WAIT_GET_PRIMCONF; tcConnectptr.i = scanptr.p->scanTcrec; @@ -9673,8 +9786,9 @@ void Dbtc::sendScanTabConf(Signal* signal, ScanRecordPtr scanPtr) { jam(); ops += 21; } - - Uint32 left = scanPtr.p->scanNoFrag - scanPtr.p->scanNextFragId; + + int left = scanPtr.p->scanNoFrag - scanPtr.p->scanNextFragId; + Uint32 booked = scanPtr.p->m_booked_fragments_count; ScanTabConf * conf = (ScanTabConf*)&signal->theData[0]; conf->apiConnectPtr = apiConnectptr.p->ndbapiConnect; @@ -9690,8 +9804,10 @@ void Dbtc::sendScanTabConf(Signal* signal, ScanRecordPtr scanPtr) { ScanFragRecPtr curr = ptr; // Remove while iterating... queued.next(ptr); - bool done = curr.p->m_scan_frag_conf_status && --left; - + bool done = curr.p->m_scan_frag_conf_status && (left <= (int)booked); + if(curr.p->m_scan_frag_conf_status) + booked++; + * ops++ = curr.p->m_apiPtr; * ops++ = done ? RNIL : curr.i; * ops++ = (curr.p->m_totalLen << 10) + curr.p->m_ops; @@ -9709,8 +9825,10 @@ void Dbtc::sendScanTabConf(Signal* signal, ScanRecordPtr scanPtr) { } } + scanPtr.p->m_booked_fragments_count = booked; if(scanPtr.p->m_delivered_scan_frags.isEmpty() && - scanPtr.p->m_running_scan_frags.isEmpty()){ + scanPtr.p->m_running_scan_frags.isEmpty()) + { conf->requestInfo = op_count | ScanTabConf::EndOfData; releaseScanResources(scanPtr); } @@ -9969,7 +10087,7 @@ void Dbtc::initialiseRecordsLab(Signal* signal, UintR Tdata0, break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; break; }//switch @@ -10025,6 +10143,9 @@ void Dbtc::initTable(Signal* signal) tabptr.p->tableType = 0; tabptr.p->enabled = false; tabptr.p->dropping = false; + tabptr.p->noOfKeyAttr = 0; + tabptr.p->hasCharAttr = 0; + tabptr.p->noOfDistrKeys = 0; }//for }//Dbtc::initTable() @@ -10204,7 +10325,7 @@ void Dbtc::releaseAbortResources(Signal* signal) if(!ok){ jam(); ndbout_c("returnsignal = %d", apiConnectptr.p->returnsignal); - sendSystemError(signal); + sendSystemError(signal, __LINE__); }//if } @@ -10432,9 +10553,9 @@ void Dbtc::sendKeyinfo(Signal* signal, BlockReference TBRef, Uint32 len) sendSignal(TBRef, GSN_KEYINFO, signal, 3 + len, JBB); }//Dbtc::sendKeyinfo() -void Dbtc::sendSystemError(Signal* signal) +void Dbtc::sendSystemError(Signal* signal, int line) { - progError(0, 0); + progError(line, NDBD_EXIT_NDBREQUIRE); }//Dbtc::sendSystemError() /* ========================================================================= */ @@ -10455,7 +10576,7 @@ void Dbtc::unlinkGcp(Signal* signal) * WE ARE TRYING TO REMOVE A GLOBAL CHECKPOINT WHICH WAS NOT THE OLDEST. * THIS IS A SYSTEM ERROR. * ------------------------------------------------------------------- */ - sendSystemError(signal); + sendSystemError(signal, __LINE__); }//if gcpPtr.p->nextGcp = cfirstfreeGcp; cfirstfreeGcp = gcpPtr.i; @@ -11228,7 +11349,7 @@ void Dbtc::execTCINDXREQ(Signal* signal) { jamEntry(); - TcIndxReq * const tcIndxReq = (TcIndxReq *)signal->getDataPtr(); + TcKeyReq * const tcIndxReq = (TcKeyReq *)signal->getDataPtr(); const UintR TapiIndex = tcIndxReq->apiConnectPtr; Uint32 tcIndxRequestInfo = tcIndxReq->requestInfo; Uint32 startFlag = tcIndxReq->getStartFlag(tcIndxRequestInfo); @@ -11239,7 +11360,7 @@ void Dbtc::execTCINDXREQ(Signal* signal) transPtr.i = TapiIndex; if (transPtr.i >= capiConnectFilesize) { jam(); - warningHandlerLab(signal); + warningHandlerLab(signal, __LINE__); return; }//if ptrAss(transPtr, apiConnectRecord); @@ -11279,7 +11400,7 @@ void Dbtc::execTCINDXREQ(Signal* signal) // If operation is readTupleExclusive or updateTuple then read index // table with exclusive lock - Uint32 indexLength = TcIndxReq::getIndexLength(tcIndxRequestInfo); + Uint32 indexLength = TcKeyReq::getKeyLength(tcIndxRequestInfo); Uint32 attrLength = tcIndxReq->attrLen; indexOp->expectedKeyInfo = indexLength; Uint32 includedIndexLength = MIN(indexLength, indexBufSize); @@ -11393,7 +11514,7 @@ void Dbtc::execINDXKEYINFO(Signal* signal) transPtr.i = TconnectIndex; if (transPtr.i >= capiConnectFilesize) { jam(); - warningHandlerLab(signal); + warningHandlerLab(signal, __LINE__); return; }//if ptrAss(transPtr, apiConnectRecord); @@ -11426,7 +11547,7 @@ void Dbtc::execINDXATTRINFO(Signal* signal) transPtr.i = TconnectIndex; if (transPtr.i >= capiConnectFilesize) { jam(); - warningHandlerLab(signal); + warningHandlerLab(signal, __LINE__); return; }//if ptrAss(transPtr, apiConnectRecord); @@ -11584,14 +11705,14 @@ void Dbtc::execTCKEYCONF(Signal* signal) case(IOS_NOOP): { jam(); // Should never happen, abort - TcIndxRef * const tcIndxRef = (TcIndxRef *)signal->getDataPtrSend(); + TcKeyRef * const tcIndxRef = (TcKeyRef *)signal->getDataPtrSend(); tcIndxRef->connectPtr = indexOp->tcIndxReq.senderData; tcIndxRef->transId[0] = regApiPtr->transid[0]; tcIndxRef->transId[1] = regApiPtr->transid[1]; tcIndxRef->errorCode = 4349; sendSignal(regApiPtr->ndbapiBlockref, GSN_TCINDXREF, signal, - TcIndxRef::SignalLength, JBB); + TcKeyRef::SignalLength, JBB); return; } case(IOS_INDEX_ACCESS): { @@ -11603,14 +11724,14 @@ void Dbtc::execTCKEYCONF(Signal* signal) case(IOS_INDEX_ACCESS_WAIT_FOR_TRANSID_AI): { jam(); // Double TCKEYCONF, should never happen, abort - TcIndxRef * const tcIndxRef = (TcIndxRef *)signal->getDataPtrSend(); + TcKeyRef * const tcIndxRef = (TcKeyRef *)signal->getDataPtrSend(); tcIndxRef->connectPtr = indexOp->tcIndxReq.senderData; tcIndxRef->transId[0] = regApiPtr->transid[0]; tcIndxRef->transId[1] = regApiPtr->transid[1]; tcIndxRef->errorCode = 4349; sendSignal(regApiPtr->ndbapiBlockref, GSN_TCINDXREF, signal, - TcIndxRef::SignalLength, JBB); + TcKeyRef::SignalLength, JBB); return; } case(IOS_INDEX_ACCESS_WAIT_FOR_TCKEYCONF): { @@ -11696,8 +11817,8 @@ void Dbtc::execTCKEYREF(Signal* signal) // Send TCINDXREF jam(); - TcIndxReq * const tcIndxReq = &indexOp->tcIndxReq; - TcIndxRef * const tcIndxRef = (TcIndxRef *)signal->getDataPtrSend(); + TcKeyReq * const tcIndxReq = &indexOp->tcIndxReq; + TcKeyRef * const tcIndxRef = (TcKeyRef *)signal->getDataPtrSend(); ndbassert(regApiPtr->noIndexOp); regApiPtr->noIndexOp--; // Decrease count @@ -11706,7 +11827,7 @@ void Dbtc::execTCKEYREF(Signal* signal) tcIndxRef->transId[1] = tcKeyRef->transId[1]; tcIndxRef->errorCode = tcKeyRef->errorCode; sendSignal(regApiPtr->ndbapiBlockref, - GSN_TCINDXREF, signal, TcIndxRef::SignalLength, JBB); + GSN_TCINDXREF, signal, TcKeyRef::SignalLength, JBB); return; } } @@ -11773,14 +11894,14 @@ void Dbtc::execTRANSID_AI(Signal* signal) signal->getLength() - TransIdAI::HeaderLength)) { jam(); // Failed to allocate space for TransIdAI - TcIndxRef * const tcIndxRef = (TcIndxRef *)signal->getDataPtrSend(); + TcKeyRef * const tcIndxRef = (TcKeyRef *)signal->getDataPtrSend(); tcIndxRef->connectPtr = indexOp->tcIndxReq.senderData; tcIndxRef->transId[0] = regApiPtr->transid[0]; tcIndxRef->transId[1] = regApiPtr->transid[1]; tcIndxRef->errorCode = 4000; sendSignal(regApiPtr->ndbapiBlockref, GSN_TCINDXREF, signal, - TcIndxRef::SignalLength, JBB); + TcKeyRef::SignalLength, JBB); return; } @@ -11788,14 +11909,14 @@ void Dbtc::execTRANSID_AI(Signal* signal) case(IOS_NOOP): { jam(); // Should never happen, abort - TcIndxRef * const tcIndxRef = (TcIndxRef *)signal->getDataPtrSend(); + TcKeyRef * const tcIndxRef = (TcKeyRef *)signal->getDataPtrSend(); tcIndxRef->connectPtr = indexOp->tcIndxReq.senderData; tcIndxRef->transId[0] = regApiPtr->transid[0]; tcIndxRef->transId[1] = regApiPtr->transid[1]; tcIndxRef->errorCode = 4349; sendSignal(regApiPtr->ndbapiBlockref, GSN_TCINDXREF, signal, - TcIndxRef::SignalLength, JBB); + TcKeyRef::SignalLength, JBB); return; break; } @@ -11816,14 +11937,14 @@ void Dbtc::execTRANSID_AI(Signal* signal) #endif /* // Too many TRANSID_AI - TcIndxRef * const tcIndxRef = (TcIndxRef *)signal->getDataPtrSend(); + TcKeyRef * const tcIndxRef = (TcKeyRef *)signal->getDataPtrSend(); tcIndexRef->connectPtr = indexOp->tcIndxReq.senderData; tcIndxRef->transId[0] = regApiPtr->transid[0]; tcIndxRef->transId[1] = regApiPtr->transid[1]; tcIndxRef->errorCode = 4349; sendSignal(regApiPtr->ndbapiBlockref, GSN_TCINDXREF, signal, - TcIndxRef::SignalLength, JBB); + TcKeyRef::SignalLength, JBB); */ break; } @@ -11841,14 +11962,14 @@ void Dbtc::execTRANSID_AI(Signal* signal) case(IOS_INDEX_OPERATION): { // Should never receive TRANSID_AI in this state!! jam(); - TcIndxRef * const tcIndxRef = (TcIndxRef *)signal->getDataPtrSend(); + TcKeyRef * const tcIndxRef = (TcKeyRef *)signal->getDataPtrSend(); tcIndxRef->connectPtr = indexOp->tcIndxReq.senderData; tcIndxRef->transId[0] = regApiPtr->transid[0]; tcIndxRef->transId[1] = regApiPtr->transid[1]; tcIndxRef->errorCode = 4349; sendSignal(regApiPtr->ndbapiBlockref, GSN_TCINDXREF, signal, - TcIndxRef::SignalLength, JBB); + TcKeyRef::SignalLength, JBB); return; } } @@ -11891,24 +12012,24 @@ void Dbtc::readIndexTable(Signal* signal, (Operation_t)TcKeyReq::getOperationType(tcKeyRequestInfo); // Find index table - if ((indexData = c_theIndexes.getPtr(indexOp->tcIndxReq.indexId)) == NULL) { + if ((indexData = c_theIndexes.getPtr(indexOp->tcIndxReq.tableId)) == NULL) { jam(); // Failed to find index record - TcIndxRef * const tcIndxRef = (TcIndxRef *)signal->getDataPtrSend(); + TcKeyRef * const tcIndxRef = (TcKeyRef *)signal->getDataPtrSend(); tcIndxRef->connectPtr = indexOp->tcIndxReq.senderData; tcIndxRef->transId[0] = regApiPtr->transid[0]; tcIndxRef->transId[1] = regApiPtr->transid[1]; tcIndxRef->errorCode = 4000; sendSignal(regApiPtr->ndbapiBlockref, GSN_TCINDXREF, signal, - TcIndxRef::SignalLength, JBB); + TcKeyRef::SignalLength, JBB); return; } tcKeyReq->transId1 = transId1; tcKeyReq->transId2 = transId2; tcKeyReq->tableId = indexData->indexId; tcKeyLength += MIN(keyLength, keyBufSize); - tcKeyReq->tableSchemaVersion = indexOp->tcIndxReq.indexSchemaVersion; + tcKeyReq->tableSchemaVersion = indexOp->tcIndxReq.tableSchemaVersion; TcKeyReq::setOperationType(tcKeyRequestInfo, opType == ZREAD ? ZREAD : ZREAD_EX); TcKeyReq::setAIInTcKeyReq(tcKeyRequestInfo, 1); // Allways send one AttrInfo @@ -12022,7 +12143,7 @@ void Dbtc::executeIndexOperation(Signal* signal, Uint32 keyBufSize = 8; // Maximum for key in TCKEYREQ Uint32 attrBufSize = 5; Uint32 dataPos = 0; - TcIndxReq * const tcIndxReq = &indexOp->tcIndxReq; + TcKeyReq * const tcIndxReq = &indexOp->tcIndxReq; TcKeyReq * const tcKeyReq = (TcKeyReq *)signal->getDataPtrSend(); Uint32 * dataPtr = &tcKeyReq->scanInfo; Uint32 tcKeyLength = TcKeyReq::StaticLength; @@ -12033,17 +12154,17 @@ void Dbtc::executeIndexOperation(Signal* signal, bool moreKeyData = indexOp->transIdAI.first(aiIter); // Find index table - if ((indexData = c_theIndexes.getPtr(tcIndxReq->indexId)) == NULL) { + if ((indexData = c_theIndexes.getPtr(tcIndxReq->tableId)) == NULL) { jam(); // Failed to find index record - TcIndxRef * const tcIndxRef = (TcIndxRef *)signal->getDataPtrSend(); + TcKeyRef * const tcIndxRef = (TcKeyRef *)signal->getDataPtrSend(); tcIndxRef->connectPtr = indexOp->tcIndxReq.senderData; tcIndxRef->transId[0] = regApiPtr->transid[0]; tcIndxRef->transId[1] = regApiPtr->transid[1]; tcIndxRef->errorCode = 4349; sendSignal(regApiPtr->ndbapiBlockref, GSN_TCINDXREF, signal, - TcIndxRef::SignalLength, JBB); + TcKeyRef::SignalLength, JBB); return; } // Find schema version of primary table diff --git a/ndb/src/kernel/blocks/dbtup/Dbtup.hpp b/ndb/src/kernel/blocks/dbtup/Dbtup.hpp index 360710d543b..0b66d9a45bb 100644 --- a/ndb/src/kernel/blocks/dbtup/Dbtup.hpp +++ b/ndb/src/kernel/blocks/dbtup/Dbtup.hpp @@ -64,6 +64,7 @@ // DbtupSystemRestart.cpp 26000 // DbtupIndex.cpp 28000 // DbtupDebug.cpp 30000 +// DbtupScan.cpp 32000 //------------------------------------------------------------------ /* @@ -207,6 +208,8 @@ #define ZTUPLE_DELETED_ERROR 626 #define ZINSERT_ERROR 630 +#define ZINVALID_CHAR_FORMAT 744 + /* SOME WORD POSITIONS OF FIELDS IN SOME HEADERS */ #define ZPAGE_STATE_POS 0 /* POSITION OF PAGE STATE */ @@ -493,26 +496,73 @@ struct DiskBufferSegmentInfo { typedef Ptr<DiskBufferSegmentInfo> DiskBufferSegmentInfoPtr; struct Fragoperrec { - bool definingFragment; + Uint64 minRows; + Uint64 maxRows; Uint32 nextFragoprec; Uint32 lqhPtrFrag; Uint32 fragidFrag; Uint32 tableidFrag; Uint32 fragPointer; Uint32 attributeCount; - Uint32 freeNullBit; + Uint32 currNullBit; + Uint32 noOfNullBits; Uint32 noOfNewAttrCount; Uint32 charsetIndex; BlockReference lqhBlockrefFrag; bool inUse; + bool definingFragment; }; typedef Ptr<Fragoperrec> FragoperrecPtr; + // Position for use by scan + struct PagePos { + Uint32 m_fragId; // "base" fragment id + Uint32 m_fragBit; // two fragments in 5.0 + Uint32 m_pageId; + Uint32 m_tupleNo; + bool m_match; + }; + + // Tup scan op (compare Dbtux::ScanOp) + struct ScanOp { + enum { + Undef = 0, + First = 1, // before first entry + Locked = 4, // at current entry (no lock needed) + Next = 5, // looking for next extry + Last = 6, // after last entry + Invalid = 9 // cannot return REF to LQH currently + }; + Uint16 m_state; + Uint16 m_lockwait; // unused + Uint32 m_userPtr; // scanptr.i in LQH + Uint32 m_userRef; + Uint32 m_tableId; + Uint32 m_fragId; // "base" fragment id + Uint32 m_fragPtrI[2]; + Uint32 m_transId1; + Uint32 m_transId2; + PagePos m_scanPos; + union { + Uint32 nextPool; + Uint32 nextList; + }; + Uint32 prevList; + }; + typedef Ptr<ScanOp> ScanOpPtr; + ArrayPool<ScanOp> c_scanOpPool; + + void scanFirst(Signal* signal, ScanOpPtr scanPtr); + void scanNext(Signal* signal, ScanOpPtr scanPtr); + void scanClose(Signal* signal, ScanOpPtr scanPtr); + void releaseScanOp(ScanOpPtr& scanPtr); + struct Fragrecord { Uint32 nextStartRange; Uint32 currentPageRange; Uint32 rootPageRange; Uint32 noOfPages; + Uint32 noOfPagesToGrow; Uint32 emptyPrimPage; Uint32 firstusedOprec; @@ -529,6 +579,9 @@ struct Fragrecord { Uint32 fragTableId; Uint32 fragmentId; Uint32 nextfreefrag; + + DLList<ScanOp> m_scanList; + Fragrecord(ArrayPool<ScanOp> & scanOpPool) : m_scanList(scanOpPool) {} }; typedef Ptr<Fragrecord> FragrecordPtr; @@ -1019,7 +1072,14 @@ public: * for md5 summing and when returning keyinfo. Returns number of * words or negative (-terrorCode) on error. */ - int tuxReadPk(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32* dataOut); + int tuxReadPk(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32* dataOut, bool xfrmFlag); + + /* + * ACC reads primary key without headers into an array of words. At + * this point in ACC deconstruction, ACC still uses logical references + * to fragment and tuple. + */ + int accReadPk(Uint32 tableId, Uint32 fragId, Uint32 fragPageId, Uint32 pageIndex, Uint32* dataOut, bool xfrmFlag); /* * TUX checks if tuple is visible to scan. @@ -1069,6 +1129,11 @@ private: void buildIndex(Signal* signal, Uint32 buildPtrI); void buildIndexReply(Signal* signal, const BuildIndexRec* buildRec); + // Tup scan + void execACC_SCANREQ(Signal* signal); + void execNEXT_SCANREQ(Signal* signal); + void execACC_CHECK_SCAN(Signal* signal); + //------------------------------------------------------------------ //------------------------------------------------------------------ // Methods to handle execution of TUPKEYREQ + ATTRINFO. @@ -1618,19 +1683,11 @@ private: Uint32 attrDescriptor, Uint32 attrDes2); -// ***************************************************************** -// Read char routines optionally (tXfrmFlag) apply strxfrm -// ***************************************************************** - - bool readCharNotNULL(Uint32* outBuffer, - AttributeHeader* ahOut, - Uint32 attrDescriptor, - Uint32 attrDes2); - bool readCharNULLable(Uint32* outBuffer, - AttributeHeader* ahOut, - Uint32 attrDescriptor, - Uint32 attrDes2); + bool readBitsNULLable(Uint32* outBuffer, AttributeHeader*, Uint32, Uint32); + bool updateBitsNULLable(Uint32* inBuffer, Uint32, Uint32); + bool readBitsNotNULL(Uint32* outBuffer, AttributeHeader*, Uint32, Uint32); + bool updateBitsNotNULL(Uint32* inBuffer, Uint32, Uint32); //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -1737,8 +1794,7 @@ private: Uint32* const mainBuffer, Uint32& noMainWords, Uint32* const copyBuffer, - Uint32& noCopyWords, - bool xfrm); + Uint32& noCopyWords); void sendTrigAttrInfo(Signal* signal, Uint32* data, diff --git a/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp b/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp index 808cfd33696..8c43de52a75 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp @@ -68,7 +68,7 @@ struct Chunk { void Dbtup::reportMemoryUsage(Signal* signal, int incDec){ - signal->theData[0] = EventReport::MemoryUsage; + signal->theData[0] = NDB_LE_MemoryUsage; signal->theData[1] = incDec; signal->theData[2] = sizeof(Page); signal->theData[3] = cnoOfAllocatedPages; diff --git a/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp b/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp index 8171fa65771..f83f21f14d8 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp @@ -858,6 +858,8 @@ void Dbtup::sendTUPKEYCONF(Signal* signal, return; }//Dbtup::sendTUPKEYCONF() +#define MAX_READ (sizeof(signal->theData) > MAX_MESSAGE_SIZE ? MAX_MESSAGE_SIZE : sizeof(signal->theData)) + /* ---------------------------------------------------------------- */ /* ----------------------------- READ ---------------------------- */ /* ---------------------------------------------------------------- */ @@ -878,7 +880,7 @@ int Dbtup::handleReadReq(Signal* signal, }//if Uint32 * dst = &signal->theData[25]; - Uint32 dstLen = (sizeof(signal->theData) / 4) - 25; + Uint32 dstLen = (MAX_READ / 4) - 25; const Uint32 node = refToNode(sendBref); if(node != 0 && node != getOwnNodeId()) { ; @@ -888,7 +890,7 @@ int Dbtup::handleReadReq(Signal* signal, * execute direct */ dst = &signal->theData[3]; - dstLen = (sizeof(signal->theData) / 4) - 3; + dstLen = (MAX_READ / 4) - 3; } if (regOperPtr->interpretedExec != 1) { @@ -1228,7 +1230,7 @@ int Dbtup::interpreterStartLab(Signal* signal, const BlockReference sendBref = regOperPtr->recBlockref; Uint32 * dst = &signal->theData[25]; - Uint32 dstLen = (sizeof(signal->theData) / 4) - 25; + Uint32 dstLen = (MAX_READ / 4) - 25; const Uint32 node = refToNode(sendBref); if(node != 0 && node != getOwnNodeId()) { ; @@ -1238,7 +1240,7 @@ int Dbtup::interpreterStartLab(Signal* signal, * execute direct */ dst = &signal->theData[3]; - dstLen = (sizeof(signal->theData) / 4) - 3; + dstLen = (MAX_READ / 4) - 3; } RtotalLen = RinitReadLen; @@ -1539,13 +1541,8 @@ int Dbtup::interpreterNextLab(Signal* signal, // Calculate the number of words of this attribute. // We allow writes into arrays as long as they fit into the 64 bit // register size. - //TEST_MR See to that TattrNoOfWords can be - // read faster from attribute description. /* --------------------------------------------------------------- */ - Uint32 TarraySize = (TattrDesc1 >> 16); - Uint32 TattrLogLen = (TattrDesc1 >> 4) & 0xf; - Uint32 TattrNoOfBits = TarraySize << TattrLogLen; - Uint32 TattrNoOfWords = (TattrNoOfBits + 31) >> 5; + Uint32 TattrNoOfWords = AttributeDescriptor::getSizeInWords(TattrDesc1); Uint32 Toptype = operPtr.p->optype; Uint32 TdataForUpdate[3]; @@ -1827,9 +1824,6 @@ int Dbtup::interpreterNextLab(Signal* signal, case Interpreter::BRANCH_ATTR_OP_ARG:{ jam(); Uint32 cond = Interpreter::getBinaryCondition(theInstruction); - Uint32 diff = Interpreter::getArrayLengthDiff(theInstruction); - Uint32 vchr = Interpreter::isVarchar(theInstruction); - Uint32 nopad =Interpreter::isNopad(theInstruction); Uint32 ins2 = TcurrentProgram[TprogramCounter]; Uint32 attrId = Interpreter::getBranchCol_AttrId(ins2) << 16; Uint32 argLen = Interpreter::getBranchCol_Len(ins2); @@ -1848,84 +1842,93 @@ int Dbtup::interpreterNextLab(Signal* signal, } tmpHabitant = attrId; } - - AttributeHeader ah(tmpArea[0]); + // get type + attrId >>= 16; + Uint32 TattrDescrIndex = tabptr.p->tabDescriptor + + (attrId << ZAD_LOG_SIZE); + Uint32 TattrDesc1 = tableDescriptor[TattrDescrIndex].tabDescr; + Uint32 TattrDesc2 = tableDescriptor[TattrDescrIndex+1].tabDescr; + Uint32 typeId = AttributeDescriptor::getType(TattrDesc1); + void * cs = 0; + if(AttributeOffset::getCharsetFlag(TattrDesc2)) + { + Uint32 pos = AttributeOffset::getCharsetPos(TattrDesc2); + cs = tabptr.p->charsetArray[pos]; + } + const NdbSqlUtil::Type& sqlType = NdbSqlUtil::getType(typeId); + + // get data + AttributeHeader ah(tmpArea[0]); const char* s1 = (char*)&tmpArea[1]; const char* s2 = (char*)&TcurrentProgram[TprogramCounter+1]; - Uint32 attrLen = (4 * ah.getDataSize()) - diff; - if (vchr) { -#if NDB_VERSION_MAJOR >= 3 - bool vok = false; - if (attrLen >= 2) { - Uint32 vlen = (s1[0] << 8) | s1[1]; // big-endian - s1 += 2; - attrLen -= 2; - if (attrLen >= vlen) { - attrLen = vlen; - vok = true; - } + // fixed length in 5.0 + Uint32 attrLen = AttributeDescriptor::getSizeInBytes(TattrDesc1); + + bool r1_null = ah.isNULL(); + bool r2_null = argLen == 0; + int res1; + if (cond != Interpreter::LIKE && + cond != Interpreter::NOT_LIKE) { + if (r1_null || r2_null) { + // NULL==NULL and NULL<not-NULL + res1 = r1_null && r2_null ? 0 : r1_null ? -1 : 1; + } else { + res1 = (*sqlType.m_cmp)(cs, s1, attrLen, s2, argLen, true); } - if (!vok) { - terrorCode = ZREGISTER_INIT_ERROR; - tupkeyErrorLab(signal); - return -1; - } -#else - Uint32 tmp; - if (attrLen >= 2) { - unsigned char* ss = (unsigned char*)&s1[attrLen - 2]; - tmp = (ss[0] << 8) | ss[1]; - if (tmp <= attrLen - 2) - attrLen = tmp; + } else { + if (r1_null || r2_null) { + // NULL like NULL is true (has no practical use) + res1 = r1_null && r2_null ? 0 : -1; + } else { + res1 = (*sqlType.m_like)(cs, s1, attrLen, s2, argLen); } - // XXX handle bad data -#endif } - bool res = false; + int res = 0; switch ((Interpreter::BinaryCondition)cond) { case Interpreter::EQ: - res = NdbSqlUtil::char_compare(s1, attrLen, s2, argLen, !nopad) == 0; + res = (res1 == 0); break; case Interpreter::NE: - res = NdbSqlUtil::char_compare(s1, attrLen, s2, argLen, !nopad) != 0; + res = (res1 != 0); break; // note the condition is backwards case Interpreter::LT: - res = NdbSqlUtil::char_compare(s1, attrLen, s2, argLen, !nopad) > 0; + res = (res1 > 0); break; case Interpreter::LE: - res = NdbSqlUtil::char_compare(s1, attrLen, s2, argLen, !nopad) >= 0; + res = (res1 >= 0); break; case Interpreter::GT: - res = NdbSqlUtil::char_compare(s1, attrLen, s2, argLen, !nopad) < 0; + res = (res1 < 0); break; case Interpreter::GE: - res = NdbSqlUtil::char_compare(s1, attrLen, s2, argLen, !nopad) <= 0; + res = (res1 <= 0); break; case Interpreter::LIKE: - res = NdbSqlUtil::char_like(s1, attrLen, s2, argLen, !nopad); + res = (res1 == 0); break; case Interpreter::NOT_LIKE: - res = ! NdbSqlUtil::char_like(s1, attrLen, s2, argLen, !nopad); + res = (res1 == 1); break; - // XXX handle invalid value + // XXX handle invalid value } #ifdef TRACE_INTERPRETER - ndbout_c("cond=%u diff=%d vc=%d nopad=%d attr(%d) = >%.*s<(%d) str=>%.*s<(%d) -> res = %d", - cond, diff, vchr, nopad, - attrId >> 16, attrLen, s1, attrLen, argLen, s2, argLen, res); + ndbout_c("cond=%u attr(%d)='%.*s'(%d) str='%.*s'(%d) res1=%d res=%d", + cond, attrId >> 16, + attrLen, s1, attrLen, argLen, s2, argLen, res1, res); #endif if (res) TprogramCounter = brancher(theInstruction, TprogramCounter); - else { - Uint32 tmp = (Interpreter::mod4(argLen) >> 2) + 1; + else + { + Uint32 tmp = ((argLen + 3) >> 2) + 1; TprogramCounter += tmp; } break; } - + case Interpreter::BRANCH_ATTR_EQ_NULL:{ jam(); Uint32 ins2 = TcurrentProgram[TprogramCounter]; diff --git a/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp b/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp index af516d53a24..66e98bd2805 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp @@ -75,25 +75,8 @@ Dbtup::Dbtup(const class Configuration & conf) c_storedProcPool(), c_buildIndexList(c_buildIndexPool) { - Uint32 log_page_size= 0; BLOCK_CONSTRUCTOR(Dbtup); - const ndb_mgm_configuration_iterator * p = conf.getOwnConfigIterator(); - ndbrequire(p != 0); - - ndb_mgm_get_int_parameter(p, CFG_DB_UNDO_DATA_BUFFER, - &log_page_size); - - /** - * Always set page size in half MBytes - */ - cnoOfUndoPage= (log_page_size / sizeof(UndoPage)); - Uint32 mega_byte_part= cnoOfUndoPage & 15; - if (mega_byte_part != 0) { - jam(); - cnoOfUndoPage+= (16 - mega_byte_part); - } - addRecSignal(GSN_DEBUG_SIG, &Dbtup::execDEBUG_SIG); addRecSignal(GSN_CONTINUEB, &Dbtup::execCONTINUEB); @@ -137,7 +120,28 @@ Dbtup::Dbtup(const class Configuration & conf) // Ordered index related addRecSignal(GSN_BUILDINDXREQ, &Dbtup::execBUILDINDXREQ); + // Tup scan + addRecSignal(GSN_ACC_SCANREQ, &Dbtup::execACC_SCANREQ); + addRecSignal(GSN_NEXT_SCANREQ, &Dbtup::execNEXT_SCANREQ); + addRecSignal(GSN_ACC_CHECK_SCAN, &Dbtup::execACC_CHECK_SCAN); + initData(); + + attrbufrec = 0; + checkpointInfo = 0; + diskBufferSegmentInfo = 0; + fragoperrec = 0; + fragrecord = 0; + hostBuffer = 0; + localLogInfo = 0; + operationrec = 0; + page = 0; + pageRange = 0; + pendingFileOpenInfo = 0; + restartInfoRecord = 0; + tablerec = 0; + tableDescriptor = 0; + undoPage = 0; }//Dbtup::Dbtup() Dbtup::~Dbtup() @@ -598,6 +602,20 @@ void Dbtup::execREAD_CONFIG_REQ(Signal* signal) theConfiguration.getOwnConfigIterator(); ndbrequire(p != 0); + Uint32 log_page_size= 0; + ndb_mgm_get_int_parameter(p, CFG_DB_UNDO_DATA_BUFFER, + &log_page_size); + + /** + * Always set page size in half MBytes + */ + cnoOfUndoPage= (log_page_size / sizeof(UndoPage)); + Uint32 mega_byte_part= cnoOfUndoPage & 15; + if (mega_byte_part != 0) { + jam(); + cnoOfUndoPage+= (16 - mega_byte_part); + } + ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_TUP_FRAG, &cnoOfFragrec)); ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_TUP_OP_RECS, &cnoOfOprec)); @@ -617,12 +635,19 @@ void Dbtup::execREAD_CONFIG_REQ(Signal* signal) ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_NO_TRIGGERS, &noOfTriggers)); + Uint32 nScanOp; // use TUX config for now + ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_TUX_SCAN_OP, &nScanOp)); + + cnoOfTabDescrRec = (cnoOfTabDescrRec & 0xFFFFFFF0) + 16; + + initRecords(); + c_storedProcPool.setSize(noOfStoredProc); c_buildIndexPool.setSize(c_noOfBuildIndexRec); c_triggerPool.setSize(noOfTriggers); + c_scanOpPool.setSize(nScanOp); - initRecords(); czero = 0; cminusOne = czero - 1; clastBitMask = 1; @@ -644,7 +669,22 @@ void Dbtup::execREAD_CONFIG_REQ(Signal* signal) void Dbtup::initRecords() { + unsigned i; + // Records with dynamic sizes + page = (Page*)allocRecord("Page", + sizeof(Page), + cnoOfPage, + false); + + undoPage = (UndoPage*)allocRecord("UndoPage", + sizeof(UndoPage), + cnoOfUndoPage); + + operationrec = (Operationrec*)allocRecord("Operationrec", + sizeof(Operationrec), + cnoOfOprec); + attrbufrec = (Attrbufrec*)allocRecord("Attrbufrec", sizeof(Attrbufrec), cnoOfAttrbufrec); @@ -665,6 +705,11 @@ void Dbtup::initRecords() fragrecord = (Fragrecord*)allocRecord("Fragrecord", sizeof(Fragrecord), cnoOfFragrec); + + for (i = 0; i<cnoOfFragrec; i++) { + void * p = &fragrecord[i]; + new (p) Fragrecord(c_scanOpPool); + } hostBuffer = (HostBuffer*)allocRecord("HostBuffer", sizeof(HostBuffer), @@ -674,15 +719,6 @@ void Dbtup::initRecords() sizeof(LocalLogInfo), cnoOfParallellUndoFiles); - operationrec = (Operationrec*)allocRecord("Operationrec", - sizeof(Operationrec), - cnoOfOprec); - - page = (Page*)allocRecord("Page", - sizeof(Page), - cnoOfPage, - false); - pageRange = (PageRange*)allocRecord("PageRange", sizeof(PageRange), cnoOfPageRangeRec); @@ -702,7 +738,7 @@ void Dbtup::initRecords() sizeof(Tablerec), cnoOfTablerec); - for(unsigned i = 0; i<cnoOfTablerec; i++) { + for (i = 0; i<cnoOfTablerec; i++) { void * p = &tablerec[i]; new (p) Tablerec(c_triggerPool); } @@ -712,11 +748,6 @@ void Dbtup::initRecords() sizeof(TableDescriptor), cnoOfTabDescrRec); - undoPage = (UndoPage*)allocRecord("UndoPage", - sizeof(UndoPage), - cnoOfUndoPage); - - // Initialize BAT for interface to file system NewVARIABLE* bat = allocateBat(3); bat[1].WA = &page->pageWord[0]; diff --git a/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp b/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp index 5a8642c4d2e..ab6e0642e11 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp @@ -173,7 +173,7 @@ Dbtup::tuxReadAttrs(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32 tu } int -Dbtup::tuxReadPk(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32* dataOut) +Dbtup::tuxReadPk(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32* dataOut, bool xfrmFlag) { ljamEntry(); // use own variables instead of globals @@ -200,8 +200,7 @@ Dbtup::tuxReadPk(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32* data operPtr.i = RNIL; operPtr.p = NULL; // do it - int ret = readAttributes(pagePtr.p, pageOffset, attrIds, - numAttrs, dataOut, ZNIL, true); + int ret = readAttributes(pagePtr.p, pageOffset, attrIds, numAttrs, dataOut, ZNIL, xfrmFlag); // restore globals tabptr = tabptr_old; fragptr = fragptr_old; @@ -229,6 +228,27 @@ Dbtup::tuxReadPk(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32* data return ret; } +int +Dbtup::accReadPk(Uint32 tableId, Uint32 fragId, Uint32 fragPageId, Uint32 pageIndex, Uint32* dataOut, bool xfrmFlag) +{ + ljamEntry(); + // get table + TablerecPtr tablePtr; + tablePtr.i = tableId; + ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); + // get fragment + FragrecordPtr fragPtr; + getFragmentrec(fragPtr, fragId, tablePtr.p); + // get real page id and tuple offset + PagePtr pagePtr; + Uint32 pageId = getRealpid(fragPtr.p, fragPageId); + ndbrequire((pageIndex & 0x1) == 0); + Uint32 pageOffset = ZPAGE_HEADER_SIZE + (pageIndex >> 1) * tablePtr.p->tupheadsize; + // use TUX routine - optimize later + int ret = tuxReadPk(fragPtr.i, pageId, pageOffset, dataOut, xfrmFlag); + return ret; +} + bool Dbtup::tuxQueryTh(Uint32 fragPtrI, Uint32 tupAddr, Uint32 tupVersion, Uint32 transId1, Uint32 transId2, Uint32 savePointId) { diff --git a/ndb/src/kernel/blocks/dbtup/DbtupMeta.cpp b/ndb/src/kernel/blocks/dbtup/DbtupMeta.cpp index 73165dc7738..7d2f7d56d48 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupMeta.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupMeta.cpp @@ -41,7 +41,8 @@ void Dbtup::execTUPFRAGREQ(Signal* signal) { ljamEntry(); - if (signal->theData[0] == (Uint32)-1) { + TupFragReq* tupFragReq = (TupFragReq*)signal->getDataPtr(); + if (tupFragReq->userPtr == (Uint32)-1) { ljam(); abortAddFragOp(signal); return; @@ -51,30 +52,34 @@ void Dbtup::execTUPFRAGREQ(Signal* signal) FragrecordPtr regFragPtr; TablerecPtr regTabPtr; - Uint32 userptr = signal->theData[0]; - Uint32 userblockref = signal->theData[1]; - Uint32 reqinfo = signal->theData[2]; - regTabPtr.i = signal->theData[3]; - Uint32 noOfAttributes = signal->theData[4]; - Uint32 fragId = signal->theData[5]; - Uint32 noOfNullAttr = signal->theData[7]; - /* Uint32 schemaVersion = signal->theData[8];*/ - Uint32 noOfKeyAttr = signal->theData[9]; + Uint32 userptr = tupFragReq->userPtr; + Uint32 userblockref = tupFragReq->userRef; + Uint32 reqinfo = tupFragReq->reqInfo; + regTabPtr.i = tupFragReq->tableId; + Uint32 noOfAttributes = tupFragReq->noOfAttr; + Uint32 fragId = tupFragReq->fragId; + Uint32 noOfNullAttr = tupFragReq->noOfNullAttr; + /* Uint32 schemaVersion = tupFragReq->schemaVersion;*/ + Uint32 noOfKeyAttr = tupFragReq->noOfKeyAttr; - Uint32 noOfNewAttr = (signal->theData[10] & 0xFFFF); - /* DICT sends number of character sets in upper half */ - Uint32 noOfCharsets = (signal->theData[10] >> 16); + Uint32 noOfNewAttr = tupFragReq->noOfNewAttr; + Uint32 noOfCharsets = tupFragReq->noOfCharsets; - Uint32 checksumIndicator = signal->theData[11]; - Uint32 noOfAttributeGroups = signal->theData[12]; - Uint32 globalCheckpointIdIndicator = signal->theData[13]; + Uint32 checksumIndicator = tupFragReq->checksumIndicator; + Uint32 noOfAttributeGroups = tupFragReq->noOfAttributeGroups; + Uint32 globalCheckpointIdIndicator = tupFragReq->globalCheckpointIdIndicator; + + Uint64 maxRows = + (((Uint64)tupFragReq->maxRowsHigh) << 32) + tupFragReq->maxRowsLow; + Uint64 minRows = + (((Uint64)tupFragReq->minRowsHigh) << 32) + tupFragReq->minRowsLow; #ifndef VM_TRACE // config mismatch - do not crash if release compiled if (regTabPtr.i >= cnoOfTablerec) { ljam(); - signal->theData[0] = userptr; - signal->theData[1] = 800; + tupFragReq->userPtr = userptr; + tupFragReq->userRef = 800; sendSignal(userblockref, GSN_TUPFRAGREF, signal, 2, JBB); return; } @@ -83,8 +88,8 @@ void Dbtup::execTUPFRAGREQ(Signal* signal) ptrCheckGuard(regTabPtr, cnoOfTablerec, tablerec); if (cfirstfreeFragopr == RNIL) { ljam(); - signal->theData[0] = userptr; - signal->theData[1] = ZNOFREE_FRAGOP_ERROR; + tupFragReq->userPtr = userptr; + tupFragReq->userRef = ZNOFREE_FRAGOP_ERROR; sendSignal(userblockref, GSN_TUPFRAGREF, signal, 2, JBB); return; }//if @@ -96,9 +101,13 @@ void Dbtup::execTUPFRAGREQ(Signal* signal) fragOperPtr.p->fragidFrag = fragId; fragOperPtr.p->tableidFrag = regTabPtr.i; fragOperPtr.p->attributeCount = noOfAttributes; - fragOperPtr.p->freeNullBit = noOfNullAttr; + fragOperPtr.p->noOfNullBits = noOfNullAttr; fragOperPtr.p->noOfNewAttrCount = noOfNewAttr; fragOperPtr.p->charsetIndex = 0; + fragOperPtr.p->currNullBit = 0; + // remove in 5.1, 2 fragments per fragment in 5.0 + fragOperPtr.p->minRows = (minRows + 1)/2; + fragOperPtr.p->maxRows = (maxRows + 1)/2; ndbrequire(reqinfo == ZADDFRAG); @@ -140,16 +149,6 @@ void Dbtup::execTUPFRAGREQ(Signal* signal) regFragPtr.p->fragmentId = fragId; regFragPtr.p->checkpointVersion = RNIL; - Uint32 noAllocatedPages = 2; - noAllocatedPages = allocFragPages(regFragPtr.p, noAllocatedPages); - - if (noAllocatedPages == 0) { - ljam(); - terrorCode = ZNO_PAGES_ALLOCATED_ERROR; - fragrefuse3Lab(signal, fragOperPtr, regFragPtr, regTabPtr.p, fragId); - return; - }//if - if (ERROR_INSERTED(4007) && regTabPtr.p->fragid[0] == fragId || ERROR_INSERTED(4008) && regTabPtr.p->fragid[1] == fragId) { ljam(); @@ -287,8 +286,7 @@ void Dbtup::execTUP_ADD_ATTRREQ(Signal* signal) ptrCheckGuard(fragOperPtr, cnoOfFragoprec, fragoperrec); Uint32 attrId = signal->theData[2]; Uint32 attrDescriptor = signal->theData[3]; - // DICT sends extended type (ignored) and charset number - Uint32 extType = (signal->theData[4] & 0xFF); + // DICT sends charset number in upper half Uint32 csNumber = (signal->theData[4] >> 16); regTabPtr.i = fragOperPtr.p->tableidFrag; @@ -309,13 +307,13 @@ void Dbtup::execTUP_ADD_ATTRREQ(Signal* signal) Uint32 firstTabDesIndex = regTabPtr.p->tabDescriptor + (attrId * ZAD_SIZE); setTabDescrWord(firstTabDesIndex, attrDescriptor); Uint32 attrLen = AttributeDescriptor::getSize(attrDescriptor); - Uint32 nullBitPos = 0; /* Default pos for NOT NULL attributes */ + Uint32 nullBitPos = fragOperPtr.p->currNullBit; + Uint32 bitCount = 0; + if (AttributeDescriptor::getNullable(attrDescriptor)) { if (!AttributeDescriptor::getDynamic(attrDescriptor)) { - ljam(); /* NULL ATTR */ - fragOperPtr.p->freeNullBit--; /* STORE NULL BIT POSTITION */ - nullBitPos = fragOperPtr.p->freeNullBit; - ndbrequire(fragOperPtr.p->freeNullBit < ZNIL); /* Check not below zero */ + ljam(); /* NULL ATTR */ + fragOperPtr.p->currNullBit++; }//if } else { ljam(); @@ -331,27 +329,40 @@ void Dbtup::execTUP_ADD_ATTRREQ(Signal* signal) case 2: { ljam(); - Uint32 bitsUsed = AttributeDescriptor::getArraySize(attrDescriptor) * (1 << attrLen); - regTabPtr.p->tupheadsize += ((bitsUsed + 31) >> 5); - break; + if(attrLen != 0) + { + ljam(); + Uint32 bitsUsed = + AttributeDescriptor::getArraySize(attrDescriptor) * (1 << attrLen); + regTabPtr.p->tupheadsize += ((bitsUsed + 31) >> 5); + break; + } + else + { + ljam(); + bitCount = AttributeDescriptor::getArraySize(attrDescriptor); + fragOperPtr.p->currNullBit += bitCount; + break; + } } default: ndbrequire(false); break; }//switch + if(nullBitPos + bitCount + 1 >= MAX_NULL_BITS) + { + terrorCode = TupAddAttrRef::TooManyBitsUsed; + addattrrefuseLab(signal, regFragPtr, fragOperPtr, regTabPtr.p, fragId); + return; + } AttributeOffset::setOffset(attrDes2, attributePos); AttributeOffset::setNullFlagPos(attrDes2, nullBitPos); } else { ndbrequire(false); }//if if (csNumber != 0) { - CHARSET_INFO* cs = get_charset(csNumber, MYF(0)); - if (cs == NULL) { - ljam(); - terrorCode = TupAddAttrRef::InvalidCharset; - addattrrefuseLab(signal, regFragPtr, fragOperPtr, regTabPtr.p, fragId); - return; - } + CHARSET_INFO* cs = all_charsets[csNumber]; + ndbrequire(cs != NULL); Uint32 i = 0; while (i < fragOperPtr.p->charsetIndex) { ljam(); @@ -375,7 +386,9 @@ void Dbtup::execTUP_ADD_ATTRREQ(Signal* signal) addattrrefuseLab(signal, regFragPtr, fragOperPtr, regTabPtr.p, fragId); return; }//if - if (lastAttr && (fragOperPtr.p->freeNullBit != 0)) { + if (lastAttr && + (fragOperPtr.p->currNullBit != fragOperPtr.p->noOfNullBits)) + { ljam(); terrorCode = ZINCONSISTENT_NULL_ATTRIBUTE_COUNT; addattrrefuseLab(signal, regFragPtr, fragOperPtr, regTabPtr.p, fragId); @@ -392,6 +405,27 @@ void Dbtup::execTUP_ADD_ATTRREQ(Signal* signal) CLEAR_ERROR_INSERT_VALUE; return; } + + if (lastAttr) + { + ljam(); + Uint32 noRowsPerPage = ZWORDS_ON_PAGE/regTabPtr.p->tupheadsize; + Uint32 noAllocatedPages = + (fragOperPtr.p->minRows + noRowsPerPage - 1 )/ noRowsPerPage; + if (fragOperPtr.p->minRows == 0) + noAllocatedPages = 2; + else if (noAllocatedPages == 0) + noAllocatedPages = 2; + noAllocatedPages = allocFragPages(regFragPtr.p, noAllocatedPages); + + if (noAllocatedPages == 0) { + ljam(); + terrorCode = ZNO_PAGES_ALLOCATED_ERROR; + addattrrefuseLab(signal, regFragPtr, fragOperPtr, regTabPtr.p, fragId); + return; + }//if + } + /* **************************************************************** */ /* ************** TUP_ADD_ATTCONF ****************** */ /* **************************************************************** */ diff --git a/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp b/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp index 1f674876642..acdb73704cb 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp @@ -332,6 +332,7 @@ void Dbtup::initFragRange(Fragrecord* const regFragPtr) regFragPtr->rootPageRange = RNIL; regFragPtr->currentPageRange = RNIL; regFragPtr->noOfPages = 0; + regFragPtr->noOfPagesToGrow = 2; regFragPtr->nextStartRange = 0; }//initFragRange() @@ -393,9 +394,10 @@ Uint32 Dbtup::allocFragPages(Fragrecord* const regFragPtr, Uint32 tafpNoAllocReq void Dbtup::allocMoreFragPages(Fragrecord* const regFragPtr) { - Uint32 noAllocPages = regFragPtr->noOfPages >> 3; // 12.5% - noAllocPages += regFragPtr->noOfPages >> 4; // 6.25% + Uint32 noAllocPages = regFragPtr->noOfPagesToGrow >> 3; // 12.5% + noAllocPages += regFragPtr->noOfPagesToGrow >> 4; // 6.25% noAllocPages += 2; + regFragPtr->noOfPagesToGrow += noAllocPages; /* -----------------------------------------------------------------*/ // We will grow by 18.75% plus two more additional pages to grow // a little bit quicker in the beginning. diff --git a/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp b/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp index 7b642f90a17..8a55777ac05 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp @@ -40,7 +40,11 @@ Dbtup::setUpQueryRoutines(Tablerec* const regTabPtr) if ((AttributeDescriptor::getArrayType(attrDescriptor) == ZNON_ARRAY) || (AttributeDescriptor::getArrayType(attrDescriptor) == ZFIXED_ARRAY)) { if (!AttributeDescriptor::getNullable(attrDescriptor)) { - if (AttributeDescriptor::getSizeInWords(attrDescriptor) == 1) { + if (AttributeDescriptor::getSize(attrDescriptor) == 0){ + ljam(); + regTabPtr->readFunctionArray[i] = &Dbtup::readBitsNotNULL; + regTabPtr->updateFunctionArray[i] = &Dbtup::updateBitsNotNULL; + } else if (AttributeDescriptor::getSizeInWords(attrDescriptor) == 1){ ljam(); regTabPtr->readFunctionArray[i] = &Dbtup::readFixedSizeTHOneWordNotNULL; regTabPtr->updateFunctionArray[i] = &Dbtup::updateFixedSizeTHOneWordNotNULL; @@ -55,13 +59,18 @@ Dbtup::setUpQueryRoutines(Tablerec* const regTabPtr) } else { ndbrequire(false); }//if - // replace read function of char attribute + // replace functions for char attribute if (AttributeOffset::getCharsetFlag(attrOffset)) { ljam(); - regTabPtr->readFunctionArray[i] = &Dbtup::readCharNotNULL; + regTabPtr->readFunctionArray[i] = &Dbtup::readFixedSizeTHManyWordNotNULL; + regTabPtr->updateFunctionArray[i] = &Dbtup::updateFixedSizeTHManyWordNotNULL; } } else { - if (AttributeDescriptor::getSizeInWords(attrDescriptor) == 1) { + if (AttributeDescriptor::getSize(attrDescriptor) == 0){ + ljam(); + regTabPtr->readFunctionArray[i] = &Dbtup::readBitsNULLable; + regTabPtr->updateFunctionArray[i] = &Dbtup::updateBitsNULLable; + } else if (AttributeDescriptor::getSizeInWords(attrDescriptor) == 1){ ljam(); regTabPtr->readFunctionArray[i] = &Dbtup::readFixedSizeTHOneWordNULLable; regTabPtr->updateFunctionArray[i] = &Dbtup::updateFixedSizeTHManyWordNULLable; @@ -78,10 +87,11 @@ Dbtup::setUpQueryRoutines(Tablerec* const regTabPtr) regTabPtr->readFunctionArray[i] = &Dbtup::readFixedSizeTHZeroWordNULLable; regTabPtr->updateFunctionArray[i] = &Dbtup::updateFixedSizeTHManyWordNULLable; }//if - // replace read function of char attribute + // replace functions for char attribute if (AttributeOffset::getCharsetFlag(attrOffset)) { ljam(); - regTabPtr->readFunctionArray[i] = &Dbtup::readCharNULLable; + regTabPtr->readFunctionArray[i] = &Dbtup::readFixedSizeTHManyWordNULLable; + regTabPtr->updateFunctionArray[i] = &Dbtup::updateFixedSizeTHManyWordNULLable; } }//if } else if (AttributeDescriptor::getArrayType(attrDescriptor) == ZVAR_ARRAY) { @@ -329,25 +339,68 @@ Dbtup::readFixedSizeTHManyWordNotNULL(Uint32* outBuffer, Uint32 attrDes2) { Uint32 indexBuf = tOutBufIndex; + Uint32 charsetFlag = AttributeOffset::getCharsetFlag(attrDes2); Uint32 readOffset = AttributeOffset::getOffset(attrDes2); Uint32 attrNoOfWords = AttributeDescriptor::getSizeInWords(attrDescriptor); - Uint32 newIndexBuf = indexBuf + attrNoOfWords; Uint32 maxRead = tMaxRead; ndbrequire((readOffset + attrNoOfWords - 1) < tCheckOffset); - if (newIndexBuf <= maxRead) { - ljam(); - ahOut->setDataSize(attrNoOfWords); - MEMCOPY_NO_WORDS(&outBuffer[indexBuf], - &tTupleHeader[readOffset], - attrNoOfWords); - tOutBufIndex = newIndexBuf; - return true; + if (! charsetFlag || ! tXfrmFlag) { + Uint32 newIndexBuf = indexBuf + attrNoOfWords; + if (newIndexBuf <= maxRead) { + ljam(); + ahOut->setDataSize(attrNoOfWords); + MEMCOPY_NO_WORDS(&outBuffer[indexBuf], + &tTupleHeader[readOffset], + attrNoOfWords); + tOutBufIndex = newIndexBuf; + return true; + } else { + ljam(); + terrorCode = ZTRY_TO_READ_TOO_MUCH_ERROR; + }//if } else { ljam(); - terrorCode = ZTRY_TO_READ_TOO_MUCH_ERROR; - return false; - }//if + Tablerec* regTabPtr = tabptr.p; + Uint32 srcBytes = AttributeDescriptor::getSizeInBytes(attrDescriptor); + uchar* dstPtr = (uchar*)&outBuffer[indexBuf]; + const uchar* srcPtr = (uchar*)&tTupleHeader[readOffset]; + Uint32 i = AttributeOffset::getCharsetPos(attrDes2); + ndbrequire(i < regTabPtr->noOfCharsets); + CHARSET_INFO* cs = regTabPtr->charsetArray[i]; + Uint32 typeId = AttributeDescriptor::getType(attrDescriptor); + Uint32 lb, len; + bool ok = NdbSqlUtil::get_var_length(typeId, srcPtr, srcBytes, lb, len); + if (ok) { + Uint32 xmul = cs->strxfrm_multiply; + if (xmul == 0) + xmul = 1; + // see comment in DbtcMain.cpp + Uint32 dstLen = xmul * (srcBytes - lb); + Uint32 maxIndexBuf = indexBuf + (dstLen >> 2); + if (maxIndexBuf <= maxRead) { + ljam(); + int n = NdbSqlUtil::strnxfrm_bug7284(cs, dstPtr, dstLen, srcPtr + lb, len); + ndbrequire(n != -1); + while ((n & 3) != 0) { + dstPtr[n++] = 0; + } + Uint32 dstWords = (n >> 2); + ahOut->setDataSize(dstWords); + Uint32 newIndexBuf = indexBuf + dstWords; + ndbrequire(newIndexBuf <= maxRead); + tOutBufIndex = newIndexBuf; + return true; + } else { + ljam(); + terrorCode = ZTRY_TO_READ_TOO_MUCH_ERROR; + } + } else { + ljam(); + terrorCode = ZTUPLE_CORRUPTED_ERROR; + } + } + return false; }//Dbtup::readFixedSizeTHManyWordNotNULL() bool @@ -394,7 +447,6 @@ Dbtup::readFixedSizeTHManyWordNULLable(Uint32* outBuffer, Uint32 attrDescriptor, Uint32 attrDes2) { -ljam(); if (!nullFlagCheck(attrDes2)) { ljam(); return readFixedSizeTHManyWordNotNULL(outBuffer, @@ -555,74 +607,6 @@ Dbtup::readDynSmallVarSize(Uint32* outBuffer, return false; }//Dbtup::readDynSmallVarSize() - -bool -Dbtup::readCharNotNULL(Uint32* outBuffer, - AttributeHeader* ahOut, - Uint32 attrDescriptor, - Uint32 attrDes2) -{ - Uint32 indexBuf = tOutBufIndex; - Uint32 readOffset = AttributeOffset::getOffset(attrDes2); - Uint32 attrNoOfWords = AttributeDescriptor::getSizeInWords(attrDescriptor); - Uint32 newIndexBuf = indexBuf + attrNoOfWords; - Uint32 maxRead = tMaxRead; - - ndbrequire((readOffset + attrNoOfWords - 1) < tCheckOffset); - if (newIndexBuf <= maxRead) { - ljam(); - ahOut->setDataSize(attrNoOfWords); - if (! tXfrmFlag) { - MEMCOPY_NO_WORDS(&outBuffer[indexBuf], - &tTupleHeader[readOffset], - attrNoOfWords); - } else { - ljam(); - Tablerec* regTabPtr = tabptr.p; - Uint32 i = AttributeOffset::getCharsetPos(attrDes2); - ndbrequire(i < tabptr.p->noOfCharsets); - // not const in MySQL - CHARSET_INFO* cs = tabptr.p->charsetArray[i]; - // XXX should strip Uint32 null padding - const unsigned nBytes = attrNoOfWords << 2; - unsigned n = - (*cs->coll->strnxfrm)(cs, - (uchar*)&outBuffer[indexBuf], - nBytes, - (const uchar*)&tTupleHeader[readOffset], - nBytes); - // pad with ascii spaces - while (n < nBytes) - ((uchar*)&outBuffer[indexBuf])[n++] = 0x20; - } - tOutBufIndex = newIndexBuf; - return true; - } else { - ljam(); - terrorCode = ZTRY_TO_READ_TOO_MUCH_ERROR; - return false; - } -} - -bool -Dbtup::readCharNULLable(Uint32* outBuffer, - AttributeHeader* ahOut, - Uint32 attrDescriptor, - Uint32 attrDes2) -{ - if (!nullFlagCheck(attrDes2)) { - ljam(); - return readCharNotNULL(outBuffer, - ahOut, - attrDescriptor, - attrDes2); - } else { - ljam(); - ahOut->setNULL(); - return true; - } -} - /* ---------------------------------------------------------------------- */ /* THIS ROUTINE IS USED TO UPDATE A NUMBER OF ATTRIBUTES. IT IS */ /* USED BY THE INSERT ROUTINE, THE UPDATE ROUTINE AND IT CAN BE */ @@ -701,22 +685,16 @@ Dbtup::checkUpdateOfPrimaryKey(Uint32* updateBuffer, Tablerec* const regTabPtr) Uint32 attrDescriptor = tableDescriptor[attrDescriptorIndex].tabDescr; Uint32 attributeOffset = tableDescriptor[attrDescriptorIndex + 1].tabDescr; - Uint32 xfrmBuffer[1 + MAX_KEY_SIZE_IN_WORDS * 1]; // strxfrm_multiply == 1 + Uint32 xfrmBuffer[1 + MAX_KEY_SIZE_IN_WORDS * MAX_XFRM_MULTIPLY]; Uint32 charsetFlag = AttributeOffset::getCharsetFlag(attributeOffset); if (charsetFlag) { - Uint32 csPos = AttributeOffset::getCharsetPos(attributeOffset); - CHARSET_INFO* cs = regTabPtr->charsetArray[csPos]; - Uint32 sizeInBytes = AttributeDescriptor::getSizeInBytes(attrDescriptor); - Uint32 sizeInWords = AttributeDescriptor::getSizeInWords(attrDescriptor); - const uchar* srcPtr = (uchar*)&updateBuffer[1]; - uchar* dstPtr = (uchar*)&xfrmBuffer[1]; - Uint32 n = - (*cs->coll->strnxfrm)(cs, dstPtr, sizeInBytes, srcPtr, sizeInBytes); - // pad with blanks (unlikely) and zeroes to match NDB API behaviour - while (n < sizeInBytes) - dstPtr[n++] = 0x20; - while (n < 4 * sizeInWords) - dstPtr[n++] = 0; + Uint32 csIndex = AttributeOffset::getCharsetPos(attributeOffset); + CHARSET_INFO* cs = regTabPtr->charsetArray[csIndex]; + Uint32 srcPos = 0; + Uint32 dstPos = 0; + xfrm_attr(attrDescriptor, cs, &updateBuffer[1], srcPos, + &xfrmBuffer[1], dstPos, MAX_KEY_SIZE_IN_WORDS * MAX_XFRM_MULTIPLY); + ahIn.setDataSize(dstPos); xfrmBuffer[0] = ahIn.m_value; updateBuffer = xfrmBuffer; } @@ -831,6 +809,7 @@ Dbtup::updateFixedSizeTHManyWordNotNULL(Uint32* inBuffer, Uint32 indexBuf = tInBufIndex; Uint32 inBufLen = tInBufLen; Uint32 updateOffset = AttributeOffset::getOffset(attrDes2); + Uint32 charsetFlag = AttributeOffset::getCharsetFlag(attrDes2); AttributeHeader ahIn(inBuffer[indexBuf]); Uint32 nullIndicator = ahIn.isNULL(); Uint32 noOfWords = AttributeDescriptor::getSizeInWords(attrDescriptor); @@ -840,6 +819,31 @@ Dbtup::updateFixedSizeTHManyWordNotNULL(Uint32* inBuffer, if (newIndex <= inBufLen) { if (!nullIndicator) { ljam(); + if (charsetFlag) { + ljam(); + Tablerec* regTabPtr = tabptr.p; + Uint32 typeId = AttributeDescriptor::getType(attrDescriptor); + Uint32 bytes = AttributeDescriptor::getSizeInBytes(attrDescriptor); + Uint32 i = AttributeOffset::getCharsetPos(attrDes2); + ndbrequire(i < regTabPtr->noOfCharsets); + // not const in MySQL + CHARSET_INFO* cs = regTabPtr->charsetArray[i]; + int not_used; + const char* ssrc = (const char*)&inBuffer[tInBufIndex + 1]; + Uint32 lb, len; + if (! NdbSqlUtil::get_var_length(typeId, ssrc, bytes, lb, len)) { + ljam(); + terrorCode = ZINVALID_CHAR_FORMAT; + return false; + } + // fast fix bug#7340 + if (typeId != NDB_TYPE_TEXT && + (*cs->cset->well_formed_len)(cs, ssrc + lb, ssrc + lb + len, ZNIL, ¬_used) != len) { + ljam(); + terrorCode = ZINVALID_CHAR_FORMAT; + return false; + } + } tInBufIndex = newIndex; MEMCOPY_NO_WORDS(&tTupleHeader[updateOffset], &inBuffer[indexBuf + 1], @@ -1011,18 +1015,198 @@ Dbtup::read_psuedo(Uint32 attrId, Uint32* outBuffer){ Signal * signal = (Signal*)&tmp; switch(attrId){ case AttributeHeader::FRAGMENT: - * outBuffer = operPtr.p->fragId; + * outBuffer = operPtr.p->fragId >> 1; // remove "hash" bit + return 1; + case AttributeHeader::FRAGMENT_MEMORY: + { + Uint64 tmp = 0; + tmp += fragptr.p->noOfPages; + { + /** + * Each fragment is split into 2...get #pages from other as well + */ + Uint32 twin = fragptr.p->fragmentId ^ 1; + FragrecordPtr twinPtr; + getFragmentrec(twinPtr, twin, tabptr.p); + ndbrequire(twinPtr.p != 0); + tmp += twinPtr.p->noOfPages; + } + tmp *= 32768; + memcpy(outBuffer,&tmp,8); + } + return 2; + case AttributeHeader::ROW_SIZE: + * outBuffer = tabptr.p->tupheadsize << 2; return 1; case AttributeHeader::ROW_COUNT: case AttributeHeader::COMMIT_COUNT: signal->theData[0] = operPtr.p->userpointer; signal->theData[1] = attrId; - + EXECUTE_DIRECT(DBLQH, GSN_READ_PSUEDO_REQ, signal, 2); outBuffer[0] = signal->theData[0]; outBuffer[1] = signal->theData[1]; return 2; + case AttributeHeader::RANGE_NO: + signal->theData[0] = operPtr.p->userpointer; + signal->theData[1] = attrId; + + EXECUTE_DIRECT(DBLQH, GSN_READ_PSUEDO_REQ, signal, 2); + outBuffer[0] = signal->theData[0]; + return 1; default: return 0; } } + +bool +Dbtup::readBitsNotNULL(Uint32* outBuffer, + AttributeHeader* ahOut, + Uint32 attrDescriptor, + Uint32 attrDes2) +{ + Tablerec* const regTabPtr = tabptr.p; + Uint32 pos = AttributeOffset::getNullFlagPos(attrDes2); + Uint32 bitCount = AttributeDescriptor::getArraySize(attrDescriptor); + Uint32 indexBuf = tOutBufIndex; + Uint32 newIndexBuf = indexBuf + ((bitCount + 31) >> 5); + Uint32 maxRead = tMaxRead; + + if (newIndexBuf <= maxRead) { + ljam(); + ahOut->setDataSize((bitCount + 31) >> 5); + tOutBufIndex = newIndexBuf; + + BitmaskImpl::getField(regTabPtr->tupNullWords, + tTupleHeader+regTabPtr->tupNullIndex, + pos, + bitCount, + outBuffer+indexBuf); + + return true; + } else { + ljam(); + terrorCode = ZTRY_TO_READ_TOO_MUCH_ERROR; + return false; + }//if +} + +bool +Dbtup::readBitsNULLable(Uint32* outBuffer, + AttributeHeader* ahOut, + Uint32 attrDescriptor, + Uint32 attrDes2) +{ + Tablerec* const regTabPtr = tabptr.p; + Uint32 pos = AttributeOffset::getNullFlagPos(attrDes2); + Uint32 bitCount = AttributeDescriptor::getArraySize(attrDescriptor); + + Uint32 indexBuf = tOutBufIndex; + Uint32 newIndexBuf = indexBuf + ((bitCount + 31) >> 5); + Uint32 maxRead = tMaxRead; + + if(BitmaskImpl::get(regTabPtr->tupNullWords, + tTupleHeader+regTabPtr->tupNullIndex, + pos)) + { + ljam(); + ahOut->setNULL(); + return true; + } + + + if (newIndexBuf <= maxRead) { + ljam(); + ahOut->setDataSize((bitCount + 31) >> 5); + tOutBufIndex = newIndexBuf; + BitmaskImpl::getField(regTabPtr->tupNullWords, + tTupleHeader+regTabPtr->tupNullIndex, + pos+1, + bitCount, + outBuffer+indexBuf); + return true; + } else { + ljam(); + terrorCode = ZTRY_TO_READ_TOO_MUCH_ERROR; + return false; + }//if +} + +bool +Dbtup::updateBitsNotNULL(Uint32* inBuffer, + Uint32 attrDescriptor, + Uint32 attrDes2) +{ + Tablerec* const regTabPtr = tabptr.p; + Uint32 indexBuf = tInBufIndex; + Uint32 inBufLen = tInBufLen; + AttributeHeader ahIn(inBuffer[indexBuf]); + Uint32 nullIndicator = ahIn.isNULL(); + Uint32 pos = AttributeOffset::getNullFlagPos(attrDes2); + Uint32 bitCount = AttributeDescriptor::getArraySize(attrDescriptor); + Uint32 newIndex = indexBuf + 1 + ((bitCount + 31) >> 5); + + if (newIndex <= inBufLen) { + if (!nullIndicator) { + BitmaskImpl::setField(regTabPtr->tupNullWords, + tTupleHeader+regTabPtr->tupNullIndex, + pos, + bitCount, + inBuffer+indexBuf+1); + tInBufIndex = newIndex; + return true; + } else { + ljam(); + terrorCode = ZNOT_NULL_ATTR; + return false; + }//if + } else { + ljam(); + terrorCode = ZAI_INCONSISTENCY_ERROR; + return false; + }//if + return true; +} + +bool +Dbtup::updateBitsNULLable(Uint32* inBuffer, + Uint32 attrDescriptor, + Uint32 attrDes2) +{ + Tablerec* const regTabPtr = tabptr.p; + AttributeHeader ahIn(inBuffer[tInBufIndex]); + Uint32 indexBuf = tInBufIndex; + Uint32 nullIndicator = ahIn.isNULL(); + Uint32 pos = AttributeOffset::getNullFlagPos(attrDes2); + Uint32 bitCount = AttributeDescriptor::getArraySize(attrDescriptor); + + if (!nullIndicator) { + BitmaskImpl::clear(regTabPtr->tupNullWords, + tTupleHeader+regTabPtr->tupNullIndex, + pos); + BitmaskImpl::setField(regTabPtr->tupNullWords, + tTupleHeader+regTabPtr->tupNullIndex, + pos+1, + bitCount, + inBuffer+indexBuf+1); + + Uint32 newIndex = indexBuf + 1 + ((bitCount + 31) >> 5); + tInBufIndex = newIndex; + return true; + } else { + Uint32 newIndex = tInBufIndex + 1; + if (newIndex <= tInBufLen) { + ljam(); + BitmaskImpl::set(regTabPtr->tupNullWords, + tTupleHeader+regTabPtr->tupNullIndex, + pos); + + tInBufIndex = newIndex; + return true; + } else { + ljam(); + terrorCode = ZAI_INCONSISTENCY_ERROR; + return false; + }//if + }//if +} diff --git a/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp b/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp new file mode 100644 index 00000000000..396404faa8c --- /dev/null +++ b/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp @@ -0,0 +1,315 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#define DBTUP_C +#include "Dbtup.hpp" +#include <signaldata/AccScan.hpp> +#include <signaldata/NextScan.hpp> + +#undef jam +#undef jamEntry +#define jam() { jamLine(32000 + __LINE__); } +#define jamEntry() { jamEntryLine(32000 + __LINE__); } + +void +Dbtup::execACC_SCANREQ(Signal* signal) +{ + jamEntry(); + const AccScanReq reqCopy = *(const AccScanReq*)signal->getDataPtr(); + const AccScanReq* const req = &reqCopy; + ScanOpPtr scanPtr; + scanPtr.i = RNIL; + do { + // find table and fragments + TablerecPtr tablePtr; + tablePtr.i = req->tableId; + ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); + FragrecordPtr fragPtr[2]; + Uint32 fragId = req->fragmentNo << 1; + fragPtr[0].i = fragPtr[1].i = RNIL; + getFragmentrec(fragPtr[0], fragId | 0, tablePtr.p); + getFragmentrec(fragPtr[1], fragId | 1, tablePtr.p); + ndbrequire(fragPtr[0].i != RNIL && fragPtr[1].i != RNIL); + Fragrecord& frag = *fragPtr[0].p; + // seize from pool and link to per-fragment list + if (! frag.m_scanList.seize(scanPtr)) { + jam(); + break; + } + new (scanPtr.p) ScanOp(); + ScanOp& scan = *scanPtr.p; + scan.m_state = ScanOp::First; + scan.m_userPtr = req->senderData; + scan.m_userRef = req->senderRef; + scan.m_tableId = tablePtr.i; + scan.m_fragId = frag.fragmentId; + scan.m_fragPtrI[0] = fragPtr[0].i; + scan.m_fragPtrI[1] = fragPtr[1].i; + scan.m_transId1 = req->transId1; + scan.m_transId2 = req->transId2; + // conf + AccScanConf* const conf = (AccScanConf*)signal->getDataPtrSend(); + conf->scanPtr = req->senderData; + conf->accPtr = scanPtr.i; + conf->flag = AccScanConf::ZNOT_EMPTY_FRAGMENT; + sendSignal(req->senderRef, GSN_ACC_SCANCONF, signal, + AccScanConf::SignalLength, JBB); + return; + } while (0); + if (scanPtr.i != RNIL) { + jam(); + releaseScanOp(scanPtr); + } + // LQH does not handle REF + signal->theData[0] = 0x313; + sendSignal(req->senderRef, GSN_ACC_SCANREF, signal, 1, JBB); +} + +void +Dbtup::execNEXT_SCANREQ(Signal* signal) +{ + jamEntry(); + const NextScanReq reqCopy = *(const NextScanReq*)signal->getDataPtr(); + const NextScanReq* const req = &reqCopy; + ScanOpPtr scanPtr; + c_scanOpPool.getPtr(scanPtr, req->accPtr); + ScanOp& scan = *scanPtr.p; + FragrecordPtr fragPtr; + fragPtr.i = scan.m_fragPtrI[0]; + ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); + Fragrecord& frag = *fragPtr.p; + switch (req->scanFlag) { + case NextScanReq::ZSCAN_NEXT: + jam(); + break; + case NextScanReq::ZSCAN_NEXT_COMMIT: + jam(); + break; + case NextScanReq::ZSCAN_COMMIT: + jam(); + { + NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend(); + conf->scanPtr = scan.m_userPtr; + unsigned signalLength = 1; + sendSignal(scanPtr.p->m_userRef, GSN_NEXT_SCANCONF, + signal, signalLength, JBB); + return; + } + break; + case NextScanReq::ZSCAN_CLOSE: + jam(); + scanClose(signal, scanPtr); + return; + case NextScanReq::ZSCAN_NEXT_ABORT: + jam(); + default: + jam(); + ndbrequire(false); + break; + } + // start looking for next scan result + AccCheckScan* checkReq = (AccCheckScan*)signal->getDataPtrSend(); + checkReq->accPtr = scanPtr.i; + checkReq->checkLcpStop = AccCheckScan::ZNOT_CHECK_LCP_STOP; + EXECUTE_DIRECT(DBTUP, GSN_ACC_CHECK_SCAN, signal, AccCheckScan::SignalLength); + jamEntry(); +} + +void +Dbtup::execACC_CHECK_SCAN(Signal* signal) +{ + jamEntry(); + const AccCheckScan reqCopy = *(const AccCheckScan*)signal->getDataPtr(); + const AccCheckScan* const req = &reqCopy; + ScanOpPtr scanPtr; + c_scanOpPool.getPtr(scanPtr, req->accPtr); + ScanOp& scan = *scanPtr.p; + FragrecordPtr fragPtr; + fragPtr.i = scan.m_fragPtrI[0]; + ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); + Fragrecord& frag = *fragPtr.p; + if (req->checkLcpStop == AccCheckScan::ZCHECK_LCP_STOP) { + jam(); + signal->theData[0] = scan.m_userPtr; + signal->theData[1] = true; + EXECUTE_DIRECT(DBLQH, GSN_CHECK_LCP_STOP, signal, 2); + jamEntry(); + return; + } + if (scan.m_state == ScanOp::First) { + jam(); + scanFirst(signal, scanPtr); + } + if (scan.m_state == ScanOp::Next) { + jam(); + scanNext(signal, scanPtr); + } + if (scan.m_state == ScanOp::Locked) { + jam(); + const PagePos& pos = scan.m_scanPos; + NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend(); + conf->scanPtr = scan.m_userPtr; + conf->accOperationPtr = (Uint32)-1; // no lock returned + conf->fragId = frag.fragmentId | pos.m_fragBit; + conf->localKey[0] = (pos.m_pageId << MAX_TUPLES_BITS) | + (pos.m_tupleNo << 1); + conf->localKey[1] = 0; + conf->localKeyLength = 1; + unsigned signalLength = 6; + Uint32 blockNo = refToBlock(scan.m_userRef); + EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, signalLength); + jamEntry(); + // next time look for next entry + scan.m_state = ScanOp::Next; + return; + } + if (scan.m_state == ScanOp::Last || + scan.m_state == ScanOp::Invalid) { + jam(); + NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend(); + conf->scanPtr = scan.m_userPtr; + conf->accOperationPtr = RNIL; + conf->fragId = RNIL; + unsigned signalLength = 3; + sendSignal(scanPtr.p->m_userRef, GSN_NEXT_SCANCONF, + signal, signalLength, JBB); + return; + } + ndbrequire(false); +} + +void +Dbtup::scanFirst(Signal* signal, ScanOpPtr scanPtr) +{ + ScanOp& scan = *scanPtr.p; + // set to first fragment, first page, first tuple + PagePos& pos = scan.m_scanPos; + pos.m_fragId = scan.m_fragId; + pos.m_fragBit = 0; + pos.m_pageId = 0; + pos.m_tupleNo = 0; + // just before + pos.m_match = false; + // let scanNext() do the work + scan.m_state = ScanOp::Next; +} + +// TODO optimize this + index build +void +Dbtup::scanNext(Signal* signal, ScanOpPtr scanPtr) +{ + ScanOp& scan = *scanPtr.p; + PagePos& pos = scan.m_scanPos; + TablerecPtr tablePtr; + tablePtr.i = scan.m_tableId; + ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); + while (true) { + // TODO time-slice here after X loops + jam(); + // get fragment + if (pos.m_fragBit == 2) { + jam(); + scan.m_state = ScanOp::Last; + break; + } + ndbrequire(pos.m_fragBit <= 1); + FragrecordPtr fragPtr; + fragPtr.i = scan.m_fragPtrI[pos.m_fragBit]; + ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); + Fragrecord& frag = *fragPtr.p; + // get page + PagePtr pagePtr; + if (pos.m_pageId >= frag.noOfPages) { + jam(); + pos.m_fragBit++; + pos.m_pageId = 0; + pos.m_tupleNo = 0; + pos.m_match = false; + continue; + } + Uint32 realPageId = getRealpid(fragPtr.p, pos.m_pageId); + pagePtr.i = realPageId; + ptrCheckGuard(pagePtr, cnoOfPage, page); + const Uint32 pageState = pagePtr.p->pageWord[ZPAGE_STATE_POS]; + if (pageState != ZTH_MM_FREE && + pageState != ZTH_MM_FULL) { + jam(); + pos.m_pageId++; + pos.m_tupleNo = 0; + pos.m_match = false; + continue; + } + // get next tuple + if (pos.m_match) + pos.m_tupleNo++; + pos.m_match = true; + const Uint32 tupheadsize = tablePtr.p->tupheadsize; + Uint32 pageOffset = ZPAGE_HEADER_SIZE + pos.m_tupleNo * tupheadsize; + if (pageOffset + tupheadsize > ZWORDS_ON_PAGE) { + jam(); + pos.m_pageId++; + pos.m_tupleNo = 0; + pos.m_match = false; + continue; + } + // skip over free tuple + bool isFree = false; + if (pageState == ZTH_MM_FREE) { + jam(); + if ((pagePtr.p->pageWord[pageOffset] >> 16) == tupheadsize) { + Uint32 nextTuple = pagePtr.p->pageWord[ZFREELIST_HEADER_POS] >> 16; + while (nextTuple != 0) { + jam(); + if (nextTuple == pageOffset) { + jam(); + isFree = true; + break; + } + nextTuple = pagePtr.p->pageWord[nextTuple] & 0xffff; + } + } + } + if (isFree) { + jam(); + continue; + } + // TODO check for operation and return latest in own tx + scan.m_state = ScanOp::Locked; + break; + } +} + +void +Dbtup::scanClose(Signal* signal, ScanOpPtr scanPtr) +{ + NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend(); + conf->scanPtr = scanPtr.p->m_userPtr; + conf->accOperationPtr = RNIL; + conf->fragId = RNIL; + unsigned signalLength = 3; + sendSignal(scanPtr.p->m_userRef, GSN_NEXT_SCANCONF, + signal, signalLength, JBB); + releaseScanOp(scanPtr); +} + +void +Dbtup::releaseScanOp(ScanOpPtr& scanPtr) +{ + FragrecordPtr fragPtr; + fragPtr.i = scanPtr.p->m_fragPtrI[0]; + ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); + fragPtr.p->m_scanList.release(scanPtr); +} diff --git a/ndb/src/kernel/blocks/dbtup/DbtupSystemRestart.cpp b/ndb/src/kernel/blocks/dbtup/DbtupSystemRestart.cpp index ed835dc057a..35d1b75e573 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupSystemRestart.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupSystemRestart.cpp @@ -187,7 +187,7 @@ Dbtup::rfrInitRestartInfoLab(Signal* signal, DiskBufferSegmentInfoPtr dbsiPtr) const Uint32 pageCount = riPtr.p->sriNumDataPages - regFragPtr.p->noOfPages; if(pageCount > 0){ Uint32 noAllocPages = allocFragPages(regFragPtr.p, pageCount); - ndbrequire(noAllocPages == pageCount); + ndbrequireErr(noAllocPages == pageCount, NDBD_EXIT_SR_OUT_OF_DATAMEMORY); }//if ndbrequire(getNoOfPages(regFragPtr.p) == riPtr.p->sriNumDataPages); @@ -418,7 +418,7 @@ void Dbtup::xlcRestartCompletedLab(Signal* signal) { cnoOfLocalLogInfo = 0; - signal->theData[0] = EventReport::UNDORecordsExecuted; + signal->theData[0] = NDB_LE_UNDORecordsExecuted; signal->theData[1] = DBTUP; // From block signal->theData[2] = 0; // Total records executed for (int i = 0; i < 10; i++) { diff --git a/ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp b/ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp index 59a31475617..6652464dc0f 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp @@ -607,7 +607,7 @@ void Dbtup::executeTrigger(Signal* signal, for everybody else. */ signal->theData[0] = trigPtr->triggerId; - signal->theData[1] = regOperPtr->fragId; + signal->theData[1] = regOperPtr->fragId >> 1; // send "real" frag id EXECUTE_DIRECT(BACKUP, GSN_BACKUP_TRIG_REQ, signal, 2); ljamEntry(); if (signal->theData[0] == 0) { @@ -622,8 +622,7 @@ void Dbtup::executeTrigger(Signal* signal, mainBuffer, noMainWords, copyBuffer, - noCopyWords, - (ref == BACKUP ? false : true))) { + noCopyWords)) { ljam(); return; }//if @@ -728,8 +727,7 @@ bool Dbtup::readTriggerInfo(TupTriggerData* const trigPtr, Uint32* const mainBuffer, Uint32& noMainWords, Uint32* const copyBuffer, - Uint32& noCopyWords, - bool xfrm) + Uint32& noCopyWords) { noCopyWords = 0; noMainWords = 0; @@ -759,7 +757,7 @@ bool Dbtup::readTriggerInfo(TupTriggerData* const trigPtr, regTabPtr->noOfKeyAttr, keyBuffer, ZATTR_BUFFER_SIZE, - xfrm); + false); ndbrequire(ret != -1); noPrimKey= ret; @@ -802,7 +800,7 @@ bool Dbtup::readTriggerInfo(TupTriggerData* const trigPtr, numAttrsToRead, mainBuffer, ZATTR_BUFFER_SIZE, - xfrm); + false); ndbrequire(ret != -1); noMainWords= ret; } else { @@ -828,7 +826,7 @@ bool Dbtup::readTriggerInfo(TupTriggerData* const trigPtr, numAttrsToRead, copyBuffer, ZATTR_BUFFER_SIZE, - xfrm); + false); ndbrequire(ret != -1); noCopyWords = ret; diff --git a/ndb/src/kernel/blocks/dbtup/Makefile.am b/ndb/src/kernel/blocks/dbtup/Makefile.am index e51410e6be3..2d14ad41025 100644 --- a/ndb/src/kernel/blocks/dbtup/Makefile.am +++ b/ndb/src/kernel/blocks/dbtup/Makefile.am @@ -18,6 +18,7 @@ libdbtup_a_SOURCES = \ DbtupGen.cpp \ DbtupSystemRestart.cpp \ DbtupIndex.cpp \ + DbtupScan.cpp \ DbtupDebug.cpp include $(top_srcdir)/ndb/config/common.mk.am diff --git a/ndb/src/kernel/blocks/dbtux/Dbtux.hpp b/ndb/src/kernel/blocks/dbtux/Dbtux.hpp index 3d78fccb780..d4a44b9e641 100644 --- a/ndb/src/kernel/blocks/dbtux/Dbtux.hpp +++ b/ndb/src/kernel/blocks/dbtux/Dbtux.hpp @@ -404,7 +404,7 @@ private: Uint32 m_accLockOp; Uint8 m_readCommitted; // no locking Uint8 m_lockMode; - Uint8 m_keyInfo; + Uint8 m_descending; ScanBound m_boundMin; ScanBound m_boundMax; ScanBound* m_bound[2]; // pointers to above 2 @@ -447,7 +447,7 @@ private: State m_state; DictTabInfo::TableType m_tableType; Uint32 m_tableId; - Uint16 m_fragOff; // offset for duplicate fragId bits + Uint16 unused; Uint16 m_numFrags; Uint32 m_fragId[MaxIndexFragments]; Uint32 m_fragPtrI[MaxIndexFragments]; @@ -473,7 +473,7 @@ private: struct Frag { Uint32 m_tableId; // copy from index level Uint32 m_indexId; - Uint16 m_fragOff; + Uint16 unused; Uint16 m_fragId; Uint32 m_descPage; // copy from index level Uint16 m_descOff; @@ -637,7 +637,7 @@ private: void execACCKEYREF(Signal* signal); void execACC_ABORTCONF(Signal* signal); void scanFirst(ScanOpPtr scanPtr); - void scanNext(ScanOpPtr scanPtr); + void scanNext(ScanOpPtr scanPtr, bool fromMaintReq); bool scanVisible(ScanOpPtr scanPtr, TreeEnt ent); void scanClose(Signal* signal, ScanOpPtr scanPtr); void addAccLockOp(ScanOp& scan, Uint32 accLockOp); @@ -649,7 +649,9 @@ private: */ void searchToAdd(Frag& frag, ConstData searchKey, TreeEnt searchEnt, TreePos& treePos); void searchToRemove(Frag& frag, ConstData searchKey, TreeEnt searchEnt, TreePos& treePos); - void searchToScan(Frag& frag, ConstData boundInfo, unsigned boundCount, TreePos& treePos); + void searchToScan(Frag& frag, ConstData boundInfo, unsigned boundCount, bool descending, TreePos& treePos); + void searchToScanAscending(Frag& frag, ConstData boundInfo, unsigned boundCount, TreePos& treePos); + void searchToScanDescending(Frag& frag, ConstData boundInfo, unsigned boundCount, TreePos& treePos); /* * DbtuxCmp.cpp @@ -1028,7 +1030,7 @@ Dbtux::ScanOp::ScanOp(ScanBoundPool& scanBoundPool) : m_accLockOp(RNIL), m_readCommitted(0), m_lockMode(0), - m_keyInfo(0), + m_descending(0), m_boundMin(scanBoundPool), m_boundMax(scanBoundPool), m_scanPos(), @@ -1072,7 +1074,6 @@ inline Dbtux::Frag::Frag(ArrayPool<ScanOp>& scanOpPool) : m_tableId(RNIL), m_indexId(RNIL), - m_fragOff(ZNIL), m_fragId(ZNIL), m_descPage(RNIL), m_descOff(0), diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxCmp.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxCmp.cpp index ddab77b97b5..cf815b14c1a 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxCmp.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxCmp.cpp @@ -18,24 +18,26 @@ #include "Dbtux.hpp" /* - * Search key vs node prefix or entry + * Search key vs node prefix or entry. * * The comparison starts at given attribute position. The position is * updated by number of equal initial attributes found. The entry data * may be partial in which case CmpUnknown may be returned. + * + * The attributes are normalized and have variable size given in words. */ int Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, ConstData searchKey, ConstData entryData, unsigned maxlen) { const unsigned numAttrs = frag.m_numAttrs; const DescEnt& descEnt = getDescEnt(frag.m_descPage, frag.m_descOff); - // number of words of attribute data left - unsigned len2 = maxlen; // skip to right position in search key only for (unsigned i = 0; i < start; i++) { jam(); searchKey += AttributeHeaderSize + searchKey.ah().getDataSize(); } + // number of words of entry data left + unsigned len2 = maxlen; int ret = 0; while (start < numAttrs) { if (len2 <= AttributeHeaderSize) { @@ -47,18 +49,20 @@ Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, ConstData searchKey, Cons if (! searchKey.ah().isNULL()) { if (! entryData.ah().isNULL()) { jam(); - // current attribute + // verify attribute id const DescAttr& descAttr = descEnt.m_descAttr[start]; - // full data size - const unsigned size1 = AttributeDescriptor::getSizeInWords(descAttr.m_attrDesc); - ndbrequire(size1 != 0 && size1 == entryData.ah().getDataSize()); - const unsigned size2 = min(size1, len2); + ndbrequire(searchKey.ah().getAttributeId() == descAttr.m_primaryAttrId); + ndbrequire(entryData.ah().getAttributeId() == descAttr.m_primaryAttrId); + // sizes + const unsigned size1 = searchKey.ah().getDataSize(); + const unsigned size2 = min(entryData.ah().getDataSize(), len2); len2 -= size2; // compare NdbSqlUtil::Cmp* const cmp = c_sqlCmp[start]; const Uint32* const p1 = &searchKey[AttributeHeaderSize]; const Uint32* const p2 = &entryData[AttributeHeaderSize]; - ret = (*cmp)(0, p1, p2, size1, size2); + const bool full = (maxlen == MaxAttrDataSize); + ret = (*cmp)(0, p1, size1 << 2, p2, size2 << 2, full); if (ret != 0) { jam(); break; @@ -99,18 +103,20 @@ Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, ConstData searchKey, Cons * * Following example illustrates this. We are at (a=2, b=3). * - * dir bounds strict return + * idir bounds strict return * 0 a >= 2 and b >= 3 no -1 * 0 a >= 2 and b > 3 yes +1 * 1 a <= 2 and b <= 3 no +1 * 1 a <= 2 and b < 3 yes -1 + * + * The attributes are normalized and have variable size given in words. */ int -Dbtux::cmpScanBound(const Frag& frag, unsigned dir, ConstData boundInfo, unsigned boundCount, ConstData entryData, unsigned maxlen) +Dbtux::cmpScanBound(const Frag& frag, unsigned idir, ConstData boundInfo, unsigned boundCount, ConstData entryData, unsigned maxlen) { const DescEnt& descEnt = getDescEnt(frag.m_descPage, frag.m_descOff); // direction 0-lower 1-upper - ndbrequire(dir <= 1); + ndbrequire(idir <= 1); // number of words of data left unsigned len2 = maxlen; // in case of no bounds, init last type to something non-strict @@ -127,21 +133,21 @@ Dbtux::cmpScanBound(const Frag& frag, unsigned dir, ConstData boundInfo, unsigne if (! boundInfo.ah().isNULL()) { if (! entryData.ah().isNULL()) { jam(); - // current attribute - const unsigned index = boundInfo.ah().getAttributeId(); + // verify attribute id + const Uint32 index = boundInfo.ah().getAttributeId(); ndbrequire(index < frag.m_numAttrs); const DescAttr& descAttr = descEnt.m_descAttr[index]; ndbrequire(entryData.ah().getAttributeId() == descAttr.m_primaryAttrId); - // full data size + // sizes const unsigned size1 = boundInfo.ah().getDataSize(); - ndbrequire(size1 != 0 && size1 == entryData.ah().getDataSize()); - const unsigned size2 = min(size1, len2); + const unsigned size2 = min(entryData.ah().getDataSize(), len2); len2 -= size2; // compare NdbSqlUtil::Cmp* const cmp = c_sqlCmp[index]; const Uint32* const p1 = &boundInfo[AttributeHeaderSize]; const Uint32* const p2 = &entryData[AttributeHeaderSize]; - int ret = (*cmp)(0, p1, p2, size1, size2); + const bool full = (maxlen == MaxAttrDataSize); + int ret = (*cmp)(0, p1, size1 << 2, p2, size2 << 2, full); if (ret != 0) { jam(); return ret; @@ -165,5 +171,5 @@ Dbtux::cmpScanBound(const Frag& frag, unsigned dir, ConstData boundInfo, unsigne } // all attributes were equal const int strict = (type & 0x1); - return (dir == 0 ? (strict == 0 ? -1 : +1) : (strict == 0 ? +1 : -1)); + return (idir == 0 ? (strict == 0 ? -1 : +1) : (strict == 0 ? +1 : -1)); } diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxDebug.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxDebug.cpp index 1e1b0d1d5b6..ed29dc57915 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxDebug.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxDebug.cpp @@ -340,14 +340,14 @@ operator<<(NdbOut& out, const Dbtux::ScanOp& scan) out << " [savePointId " << dec << scan.m_savePointId << "]"; out << " [accLockOp " << hex << scan.m_accLockOp << "]"; out << " [accLockOps"; - for (unsigned i = 0; i < Dbtux::MaxAccLockOps; i++) { + for (unsigned i = 0; i < scan.m_maxAccLockOps; i++) { if (scan.m_accLockOps[i] != RNIL) out << " " << hex << scan.m_accLockOps[i]; } out << "]"; out << " [readCommitted " << dec << scan.m_readCommitted << "]"; out << " [lockMode " << dec << scan.m_lockMode << "]"; - out << " [keyInfo " << dec << scan.m_keyInfo << "]"; + out << " [descending " << dec << scan.m_descending << "]"; out << " [pos " << scan.m_scanPos << "]"; out << " [ent " << scan.m_scanEnt << "]"; for (unsigned i = 0; i <= 1; i++) { @@ -370,7 +370,6 @@ operator<<(NdbOut& out, const Dbtux::Index& index) { out << "[Index " << hex << &index; out << " [tableId " << dec << index.m_tableId << "]"; - out << " [fragOff " << dec << index.m_fragOff << "]"; out << " [numFrags " << dec << index.m_numFrags << "]"; for (unsigned i = 0; i < index.m_numFrags; i++) { out << " [frag " << dec << i << " "; @@ -393,7 +392,6 @@ operator<<(NdbOut& out, const Dbtux::Frag& frag) out << "[Frag " << hex << &frag; out << " [tableId " << dec << frag.m_tableId << "]"; out << " [indexId " << dec << frag.m_indexId << "]"; - out << " [fragOff " << dec << frag.m_fragOff << "]"; out << " [fragId " << dec << frag.m_fragId << "]"; out << " [descPage " << hex << frag.m_descPage << "]"; out << " [descOff " << dec << frag.m_descOff << "]"; diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxGen.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxGen.cpp index 8990d6c86b6..5640fdf2899 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxGen.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxGen.cpp @@ -217,6 +217,7 @@ Dbtux::setKeyAttrs(const Frag& frag) const unsigned numAttrs = frag.m_numAttrs; const DescEnt& descEnt = getDescEnt(frag.m_descPage, frag.m_descOff); for (unsigned i = 0; i < numAttrs; i++) { + jam(); const DescAttr& descAttr = descEnt.m_descAttr[i]; Uint32 size = AttributeDescriptor::getSizeInWords(descAttr.m_attrDesc); // set attr id and fixed size @@ -244,6 +245,26 @@ Dbtux::readKeyAttrs(const Frag& frag, TreeEnt ent, unsigned start, Data keyData) jamEntry(); // TODO handle error ndbrequire(ret > 0); +#ifdef VM_TRACE + if (debugFlags & (DebugMaint | DebugScan)) { + debugOut << "readKeyAttrs:" << endl; + ConstData data = keyData; + Uint32 totalSize = 0; + for (Uint32 i = start; i < frag.m_numAttrs; i++) { + Uint32 attrId = data.ah().getAttributeId(); + Uint32 dataSize = data.ah().getDataSize(); + debugOut << i << " attrId=" << attrId << " size=" << dataSize; + data += 1; + for (Uint32 j = 0; j < dataSize; j++) { + debugOut << " " << hex << data[0]; + data += 1; + } + debugOut << endl; + totalSize += 1 + dataSize; + } + ndbassert((int)totalSize == ret); + } +#endif } void @@ -251,7 +272,7 @@ Dbtux::readTablePk(const Frag& frag, TreeEnt ent, Data pkData, unsigned& pkSize) { const Uint32 tableFragPtrI = frag.m_tupTableFragPtrI[ent.m_fragBit]; const TupLoc tupLoc = ent.m_tupLoc; - int ret = c_tup->tuxReadPk(tableFragPtrI, tupLoc.getPageId(), tupLoc.getPageOffset(), pkData); + int ret = c_tup->tuxReadPk(tableFragPtrI, tupLoc.getPageId(), tupLoc.getPageOffset(), pkData, true); jamEntry(); // TODO handle error ndbrequire(ret > 0); diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxMaint.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxMaint.cpp index 30afb51e7d7..4b568badc67 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxMaint.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxMaint.cpp @@ -57,9 +57,8 @@ Dbtux::execTUX_MAINT_REQ(Signal* signal) c_indexPool.getPtr(indexPtr, req->indexId); ndbrequire(indexPtr.p->m_tableId == req->tableId); // get base fragment id and extra bits - const Uint32 fragOff = indexPtr.p->m_fragOff; - const Uint32 fragId = req->fragId & ((1 << fragOff) - 1); - const Uint32 fragBit = req->fragId >> fragOff; + const Uint32 fragId = req->fragId & ~1; + const Uint32 fragBit = req->fragId & 1; // get the fragment FragPtr fragPtr; fragPtr.i = RNIL; diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxMeta.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxMeta.cpp index b7526593a08..c85c8384081 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxMeta.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxMeta.cpp @@ -16,6 +16,7 @@ #define DBTUX_META_CPP #include "Dbtux.hpp" +#include <my_sys.h> /* * Create index. @@ -84,7 +85,6 @@ Dbtux::execTUXFRAGREQ(Signal* signal) new (fragPtr.p) Frag(c_scanOpPool); fragPtr.p->m_tableId = req->primaryTableId; fragPtr.p->m_indexId = req->tableId; - fragPtr.p->m_fragOff = req->fragOff; fragPtr.p->m_fragId = req->fragId; fragPtr.p->m_numAttrs = req->noOfAttr; fragPtr.p->m_storeNullKey = true; // not yet configurable @@ -112,7 +112,6 @@ Dbtux::execTUXFRAGREQ(Signal* signal) indexPtr.p->m_state = Index::Defining; indexPtr.p->m_tableType = (DictTabInfo::TableType)req->tableType; indexPtr.p->m_tableId = req->primaryTableId; - indexPtr.p->m_fragOff = req->fragOff; indexPtr.p->m_numAttrs = req->noOfAttr; indexPtr.p->m_storeNullKey = true; // not yet configurable // allocate attribute descriptors @@ -128,7 +127,6 @@ Dbtux::execTUXFRAGREQ(Signal* signal) indexPtr.p->m_state == Index::Defining && indexPtr.p->m_tableType == (DictTabInfo::TableType)req->tableType && indexPtr.p->m_tableId == req->primaryTableId && - indexPtr.p->m_fragOff == req->fragOff && indexPtr.p->m_numAttrs == req->noOfAttr); } // copy metadata address to each fragment @@ -203,7 +201,7 @@ Dbtux::execTUX_ADD_ATTRREQ(Signal* signal) DescAttr& descAttr = descEnt.m_descAttr[attrId]; descAttr.m_attrDesc = req->attrDescriptor; descAttr.m_primaryAttrId = req->primaryAttrId; - descAttr.m_typeId = req->extTypeInfo & 0xFF; + descAttr.m_typeId = AttributeDescriptor::getType(req->attrDescriptor); descAttr.m_charset = (req->extTypeInfo >> 16); #ifdef VM_TRACE if (debugFlags & DebugMeta) { @@ -218,17 +216,16 @@ Dbtux::execTUX_ADD_ATTRREQ(Signal* signal) errorCode = TuxAddAttrRef::InvalidAttributeType; break; } -#ifdef dbtux_uses_charset if (descAttr.m_charset != 0) { - CHARSET_INFO *cs = get_charset(descAttr.m_charset, MYF(0)); - // here use the non-binary type - if (! NdbSqlUtil::usable_in_ordered_index(descAttr.m_typeId, cs)) { + uint err; + CHARSET_INFO *cs = all_charsets[descAttr.m_charset]; + ndbrequire(cs != 0); + if ((err = NdbSqlUtil::check_column_for_ordered_index(descAttr.m_typeId, cs))) { jam(); - errorCode = TuxAddAttrRef::InvalidCharset; + errorCode = (TuxAddAttrRef::ErrorCode) err; break; } } -#endif const bool lastAttr = (indexPtr.p->m_numAttrs == fragOpPtr.p->m_numAttrsRecvd); if (ERROR_INSERTED(12003) && fragOpPtr.p->m_fragNo == 0 && attrId == 0 || ERROR_INSERTED(12004) && fragOpPtr.p->m_fragNo == 0 && lastAttr || diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxNode.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxNode.cpp index 9f9d4cb68e3..68a3e78ce9e 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxNode.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxNode.cpp @@ -502,7 +502,7 @@ Dbtux::moveScanList(NodeHandle& node, unsigned pos) debugOut << "At pos=" << pos << " " << node << endl; } #endif - scanNext(scanPtr); + scanNext(scanPtr, true); ndbrequire(! (scanPos.m_loc == node.m_loc && scanPos.m_pos == pos)); } scanPtr.i = nextPtrI; diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxScan.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxScan.cpp index 8677ae741b3..a61b7c1f5ca 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxScan.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxScan.cpp @@ -16,6 +16,7 @@ #define DBTUX_SCAN_CPP #include "Dbtux.hpp" +#include <my_sys.h> void Dbtux::execACC_SCANREQ(Signal* signal) @@ -34,7 +35,7 @@ Dbtux::execACC_SCANREQ(Signal* signal) fragPtr.i = RNIL; for (unsigned i = 0; i < indexPtr.p->m_numFrags; i++) { jam(); - if (indexPtr.p->m_fragId[i] == req->fragmentNo) { + if (indexPtr.p->m_fragId[i] == req->fragmentNo << 1) { jam(); c_fragPool.getPtr(fragPtr, indexPtr.p->m_fragPtrI[i]); break; @@ -43,7 +44,6 @@ Dbtux::execACC_SCANREQ(Signal* signal) ndbrequire(fragPtr.i != RNIL); Frag& frag = *fragPtr.p; // must be normal DIH/TC fragment - ndbrequire(frag.m_fragId < (1 << frag.m_fragOff)); TreeHead& tree = frag.m_tree; // check for empty fragment if (tree.m_root == NullTupLoc) { @@ -74,18 +74,18 @@ Dbtux::execACC_SCANREQ(Signal* signal) scanPtr.p->m_savePointId = req->savePointId; scanPtr.p->m_readCommitted = AccScanReq::getReadCommittedFlag(req->requestInfo); scanPtr.p->m_lockMode = AccScanReq::getLockMode(req->requestInfo); - scanPtr.p->m_keyInfo = AccScanReq::getKeyinfoFlag(req->requestInfo); -#ifdef VM_TRACE - if (debugFlags & DebugScan) { - debugOut << "Seize scan " << scanPtr.i << " " << *scanPtr.p << endl; - } -#endif + scanPtr.p->m_descending = AccScanReq::getDescendingFlag(req->requestInfo); /* * readCommitted lockMode keyInfo * 1 0 0 - read committed (no lock) * 0 0 0 - read latest (read lock) * 0 1 1 - read exclusive (write lock) */ +#ifdef VM_TRACE + if (debugFlags & DebugScan) { + debugOut << "Seize scan " << scanPtr.i << " " << *scanPtr.p << endl; + } +#endif // conf AccScanConf* const conf = (AccScanConf*)signal->getDataPtrSend(); conf->scanPtr = req->senderData; @@ -114,50 +114,100 @@ Dbtux::execACC_SCANREQ(Signal* signal) * keys and that all but possibly last bound is non-strict. * * Finally save the sets of lower and upper bounds (i.e. start key and - * end key). Full bound type (< 4) is included but only the strict bit - * is used since lower and upper have now been separated. + * end key). Full bound type is included but only the strict bit is + * used since lower and upper have now been separated. */ void Dbtux::execTUX_BOUND_INFO(Signal* signal) { jamEntry(); - struct BoundInfo { - int type; - unsigned offset; - unsigned size; - }; - TuxBoundInfo* const sig = (TuxBoundInfo*)signal->getDataPtrSend(); - const TuxBoundInfo reqCopy = *(const TuxBoundInfo*)sig; - const TuxBoundInfo* const req = &reqCopy; // get records + TuxBoundInfo* const sig = (TuxBoundInfo*)signal->getDataPtrSend(); + const TuxBoundInfo* const req = (const TuxBoundInfo*)sig; ScanOp& scan = *c_scanOpPool.getPtr(req->tuxScanPtrI); - Index& index = *c_indexPool.getPtr(scan.m_indexId); - // collect lower and upper bounds + const Index& index = *c_indexPool.getPtr(scan.m_indexId); + const DescEnt& descEnt = getDescEnt(index.m_descPage, index.m_descOff); + // collect normalized lower and upper bounds + struct BoundInfo { + int type2; // with EQ -> LE/GE + Uint32 offset; // offset in xfrmData + Uint32 size; + }; BoundInfo boundInfo[2][MaxIndexAttributes]; + const unsigned dstSize = 1024 * MAX_XFRM_MULTIPLY; + Uint32 xfrmData[dstSize]; + Uint32 dstPos = 0; // largest attrId seen plus one Uint32 maxAttrId[2] = { 0, 0 }; - unsigned offset = 0; - const Uint32* const data = (Uint32*)sig + TuxBoundInfo::SignalLength; // walk through entries + const Uint32* const data = (Uint32*)sig + TuxBoundInfo::SignalLength; + Uint32 offset = 0; while (offset + 2 <= req->boundAiLength) { jam(); const unsigned type = data[offset]; - if (type > 4) { - jam(); - scan.m_state = ScanOp::Invalid; - sig->errorCode = TuxBoundInfo::InvalidAttrInfo; - return; - } const AttributeHeader* ah = (const AttributeHeader*)&data[offset + 1]; const Uint32 attrId = ah->getAttributeId(); const Uint32 dataSize = ah->getDataSize(); - if (attrId >= index.m_numAttrs) { + if (type > 4 || attrId >= index.m_numAttrs || dstPos + 2 + dataSize > dstSize) { jam(); scan.m_state = ScanOp::Invalid; sig->errorCode = TuxBoundInfo::InvalidAttrInfo; return; } + // copy header + xfrmData[dstPos + 0] = data[offset + 0]; + xfrmData[dstPos + 1] = data[offset + 1]; + // copy bound value + Uint32 dstWords = 0; + if (! ah->isNULL()) { + jam(); + const DescAttr& descAttr = descEnt.m_descAttr[attrId]; + Uint32 srcBytes = AttributeDescriptor::getSizeInBytes(descAttr.m_attrDesc); + Uint32 srcWords = (srcBytes + 3) / 4; + if (srcWords != dataSize) { + jam(); + scan.m_state = ScanOp::Invalid; + sig->errorCode = TuxBoundInfo::InvalidAttrInfo; + return; + } + uchar* dstPtr = (uchar*)&xfrmData[dstPos + 2]; + const uchar* srcPtr = (const uchar*)&data[offset + 2]; + if (descAttr.m_charset == 0) { + memcpy(dstPtr, srcPtr, srcWords << 2); + dstWords = srcWords; + } else { + jam(); + Uint32 typeId = descAttr.m_typeId; + Uint32 lb, len; + bool ok = NdbSqlUtil::get_var_length(typeId, srcPtr, srcBytes, lb, len); + if (! ok) { + jam(); + scan.m_state = ScanOp::Invalid; + sig->errorCode = TuxBoundInfo::InvalidCharFormat; + return; + } + CHARSET_INFO* cs = all_charsets[descAttr.m_charset]; + Uint32 xmul = cs->strxfrm_multiply; + if (xmul == 0) + xmul = 1; + // see comment in DbtcMain.cpp + Uint32 dstLen = xmul * (srcBytes - lb); + if (dstLen > ((dstSize - dstPos) << 2)) { + jam(); + scan.m_state = ScanOp::Invalid; + sig->errorCode = TuxBoundInfo::TooMuchAttrInfo; + return; + } + int n = NdbSqlUtil::strnxfrm_bug7284(cs, dstPtr, dstLen, srcPtr + lb, len); + ndbrequire(n != -1); + while ((n & 3) != 0) { + dstPtr[n++] = 0; + } + dstWords = n / 4; + } + } for (unsigned j = 0; j <= 1; j++) { + jam(); // check if lower/upper bit matches const unsigned luBit = (j << 1); if ((type & 0x2) != luBit && type != 4) @@ -166,29 +216,35 @@ Dbtux::execTUX_BOUND_INFO(Signal* signal) const unsigned type2 = (type & 0x1) | luBit; // fill in any gap while (maxAttrId[j] <= attrId) { + jam(); BoundInfo& b = boundInfo[j][maxAttrId[j]++]; - b.type = -1; + b.type2 = -1; } BoundInfo& b = boundInfo[j][attrId]; - if (b.type != -1) { - // compare with previous bound - if (b.type != (int)type2 || - b.size != 2 + dataSize || - memcmp(&data[b.offset + 2], &data[offset + 2], dataSize << 2) != 0) { + if (b.type2 != -1) { + // compare with previously defined bound + if (b.type2 != (int)type2 || + b.size != 2 + dstWords || + memcmp(&xfrmData[b.offset + 2], &xfrmData[dstPos + 2], dstWords << 2) != 0) { jam(); scan.m_state = ScanOp::Invalid; sig->errorCode = TuxBoundInfo::InvalidBounds; return; } } else { + // fix length + AttributeHeader* ah = (AttributeHeader*)&xfrmData[dstPos + 1]; + ah->setDataSize(dstWords); // enter new bound - b.type = type2; - b.offset = offset; - b.size = 2 + dataSize; + jam(); + b.type2 = type2; + b.offset = dstPos; + b.size = 2 + dstWords; } } // jump to next offset += 2 + dataSize; + dstPos += 2 + dstWords; } if (offset != req->boundAiLength) { jam(); @@ -202,13 +258,13 @@ Dbtux::execTUX_BOUND_INFO(Signal* signal) jam(); const BoundInfo& b = boundInfo[j][i]; // check for gap or strict bound before last - if (b.type == -1 || (i + 1 < maxAttrId[j] && (b.type & 0x1))) { + if (b.type2 == -1 || (i + 1 < maxAttrId[j] && (b.type2 & 0x1))) { jam(); scan.m_state = ScanOp::Invalid; sig->errorCode = TuxBoundInfo::InvalidBounds; return; } - bool ok = scan.m_bound[j]->append(&data[b.offset], b.size); + bool ok = scan.m_bound[j]->append(&xfrmData[b.offset], b.size); if (! ok) { jam(); scan.m_state = ScanOp::Invalid; @@ -354,7 +410,7 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal) NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend(); conf->scanPtr = scan.m_userPtr; conf->accOperationPtr = RNIL; // no tuple returned - conf->fragId = frag.m_fragId | (ent.m_fragBit << frag.m_fragOff); + conf->fragId = frag.m_fragId | ent.m_fragBit; unsigned signalLength = 3; // if TC has ordered scan close, it will be detected here sendSignal(scan.m_userRef, GSN_NEXT_SCANCONF, @@ -374,7 +430,7 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal) if (scan.m_state == ScanOp::Next) { jam(); // look for next - scanNext(scanPtr); + scanNext(scanPtr, false); } // for reading tuple key in Current or Locked state Data pkData = c_dataBuffer; @@ -397,7 +453,7 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal) lockReq->userPtr = scanPtr.i; lockReq->userRef = reference(); lockReq->tableId = scan.m_tableId; - lockReq->fragId = frag.m_fragId | (ent.m_fragBit << frag.m_fragOff); + lockReq->fragId = frag.m_fragId | ent.m_fragBit; lockReq->fragPtrI = frag.m_accTableFragPtrI[ent.m_fragBit]; const Uint32* const buf32 = static_cast<Uint32*>(pkData); const Uint64* const buf64 = reinterpret_cast<const Uint64*>(buf32); @@ -474,13 +530,6 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal) jam(); // read keys if not already done (uses signal) const TreeEnt ent = scan.m_scanEnt; - if (scan.m_keyInfo) { - jam(); - if (pkSize == 0) { - jam(); - readTablePk(frag, ent, pkData, pkSize); - } - } // conf signal NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend(); conf->scanPtr = scan.m_userPtr; @@ -496,21 +545,12 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal) accLockOp = (Uint32)-1; } conf->accOperationPtr = accLockOp; - conf->fragId = frag.m_fragId | (ent.m_fragBit << frag.m_fragOff); + conf->fragId = frag.m_fragId | ent.m_fragBit; conf->localKey[0] = getTupAddr(frag, ent); conf->localKey[1] = 0; conf->localKeyLength = 1; unsigned signalLength = 6; // add key info - if (scan.m_keyInfo) { - jam(); - conf->keyLength = pkSize; - // piggy-back first 4 words of key data - for (unsigned i = 0; i < 4; i++) { - conf->key[i] = i < pkSize ? pkData[i] : 0; - } - signalLength = 11; - } if (! scan.m_readCommitted) { sendSignal(scan.m_userRef, GSN_NEXT_SCANCONF, signal, signalLength, JBB); @@ -518,24 +558,6 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal) Uint32 blockNo = refToBlock(scan.m_userRef); EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, signalLength); } - // send rest of key data - if (scan.m_keyInfo && pkSize > 4) { - unsigned total = 4; - while (total < pkSize) { - jam(); - unsigned length = pkSize - total; - if (length > 20) - length = 20; - signal->theData[0] = scan.m_userPtr; - signal->theData[1] = 0; - signal->theData[2] = 0; - signal->theData[3] = length; - memcpy(&signal->theData[4], &pkData[total], length << 2); - sendSignal(scan.m_userRef, GSN_ACC_SCAN_INFO24, - signal, 4 + length, JBB); - total += length; - } - } // next time look for next entry scan.m_state = ScanOp::Next; return; @@ -687,8 +709,10 @@ Dbtux::scanFirst(ScanOpPtr scanPtr) TreeHead& tree = frag.m_tree; // set up index keys for this operation setKeyAttrs(frag); - // unpack lower bound into c_dataBuffer - const ScanBound& bound = *scan.m_bound[0]; + // scan direction 0, 1 + const unsigned idir = scan.m_descending; + // unpack start key into c_dataBuffer + const ScanBound& bound = *scan.m_bound[idir]; ScanBoundIterator iter; bound.first(iter); for (unsigned j = 0; j < bound.getSize(); j++) { @@ -696,11 +720,10 @@ Dbtux::scanFirst(ScanOpPtr scanPtr) c_dataBuffer[j] = *iter.data; bound.next(iter); } - // search for scan start position TreePos treePos; - searchToScan(frag, c_dataBuffer, scan.m_boundCnt[0], treePos); + searchToScan(frag, c_dataBuffer, scan.m_boundCnt[idir], scan.m_descending, treePos); if (treePos.m_loc == NullTupLoc) { - // empty tree + // empty result set jam(); scan.m_state = ScanOp::Last; return; @@ -718,7 +741,8 @@ Dbtux::scanFirst(ScanOpPtr scanPtr) * Move to next entry. The scan is already linked to some node. When * we leave, if an entry was found, it will be linked to a possibly * different node. The scan has a position, and a direction which tells - * from where we came to this position. This is one of: + * from where we came to this position. This is one of (all comments + * are in terms of ascending scan): * * 0 - up from left child (scan this node next) * 1 - up from right child (proceed to parent) @@ -730,7 +754,7 @@ Dbtux::scanFirst(ScanOpPtr scanPtr) * re-organizations need not worry about scan direction. */ void -Dbtux::scanNext(ScanOpPtr scanPtr) +Dbtux::scanNext(ScanOpPtr scanPtr, bool fromMaintReq) { ScanOp& scan = *scanPtr.p; Frag& frag = *c_fragPool.getPtr(scan.m_fragPtrI); @@ -743,8 +767,11 @@ Dbtux::scanNext(ScanOpPtr scanPtr) ndbrequire(scan.m_state != ScanOp::Locked); // set up index keys for this operation setKeyAttrs(frag); - // unpack upper bound into c_dataBuffer - const ScanBound& bound = *scan.m_bound[1]; + // scan direction + const unsigned idir = scan.m_descending; // 0, 1 + const int jdir = 1 - 2 * (int)idir; // 1, -1 + // unpack end key into c_dataBuffer + const ScanBound& bound = *scan.m_bound[1 - idir]; ScanBoundIterator iter; bound.first(iter); for (unsigned j = 0; j < bound.getSize(); j++) { @@ -764,6 +791,11 @@ Dbtux::scanNext(ScanOpPtr scanPtr) TreeEnt ent; while (true) { jam(); +#ifdef VM_TRACE + if (debugFlags & DebugScan) { + debugOut << "Scan next pos " << pos << " " << node << endl; + } +#endif if (pos.m_dir == 2) { // coming up from root ends the scan jam(); @@ -778,7 +810,7 @@ Dbtux::scanNext(ScanOpPtr scanPtr) if (pos.m_dir == 4) { // coming down from parent proceed to left child jam(); - TupLoc loc = node.getLink(0); + TupLoc loc = node.getLink(idir); if (loc != NullTupLoc) { jam(); pos.m_loc = loc; @@ -786,34 +818,42 @@ Dbtux::scanNext(ScanOpPtr scanPtr) continue; } // pretend we came from left child - pos.m_dir = 0; + pos.m_dir = idir; + } + const unsigned occup = node.getOccup(); + if (occup == 0) { + jam(); + ndbrequire(fromMaintReq); + // move back to parent - see comment in treeRemoveInner + pos.m_loc = node.getLink(2); + pos.m_dir = node.getSide(); + continue; } - if (pos.m_dir == 0) { + if (pos.m_dir == idir) { // coming up from left child scan current node jam(); - pos.m_pos = 0; + pos.m_pos = idir == 0 ? 0 : occup - 1; pos.m_match = false; pos.m_dir = 3; } if (pos.m_dir == 3) { // within node jam(); - unsigned occup = node.getOccup(); - ndbrequire(occup >= 1); // advance position if (! pos.m_match) pos.m_match = true; else - pos.m_pos++; + // becomes ZNIL (which is > occup) if 0 and scan descending + pos.m_pos += jdir; if (pos.m_pos < occup) { jam(); ent = node.getEnt(pos.m_pos); pos.m_dir = 3; // unchanged // read and compare all attributes readKeyAttrs(frag, ent, 0, c_entryKey); - int ret = cmpScanBound(frag, 1, c_dataBuffer, scan.m_boundCnt[1], c_entryKey); + int ret = cmpScanBound(frag, 1 - idir, c_dataBuffer, scan.m_boundCnt[1 - idir], c_entryKey); ndbrequire(ret != NdbSqlUtil::CmpUnknown); - if (ret < 0) { + if (jdir * ret < 0) { jam(); // hit upper bound of single range scan pos.m_loc = NullTupLoc; @@ -830,7 +870,7 @@ Dbtux::scanNext(ScanOpPtr scanPtr) break; } // after node proceed to right child - TupLoc loc = node.getLink(1); + TupLoc loc = node.getLink(1 - idir); if (loc != NullTupLoc) { jam(); pos.m_loc = loc; @@ -838,9 +878,9 @@ Dbtux::scanNext(ScanOpPtr scanPtr) continue; } // pretend we came from right child - pos.m_dir = 1; + pos.m_dir = 1 - idir; } - if (pos.m_dir == 1) { + if (pos.m_dir == 1 - idir) { // coming up from right child proceed to parent jam(); pos.m_loc = node.getLink(2); @@ -890,7 +930,7 @@ Dbtux::scanVisible(ScanOpPtr scanPtr, TreeEnt ent) const Frag& frag = *c_fragPool.getPtr(scan.m_fragPtrI); Uint32 fragBit = ent.m_fragBit; Uint32 tableFragPtrI = frag.m_tupTableFragPtrI[fragBit]; - Uint32 fragId = frag.m_fragId | (fragBit << frag.m_fragOff); + Uint32 fragId = frag.m_fragId | fragBit; Uint32 tupAddr = getTupAddr(frag, ent); Uint32 tupVersion = ent.m_tupVersion; // check for same tuple twice in row diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxSearch.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxSearch.cpp index 7057d74c3ad..b0e2a664bfd 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxSearch.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxSearch.cpp @@ -253,22 +253,33 @@ Dbtux::searchToRemove(Frag& frag, ConstData searchKey, TreeEnt searchEnt, TreePo /* * Search for scan start position. * - * Similar to searchToAdd. + * Similar to searchToAdd. The routines differ somewhat depending on + * scan direction and are done by separate methods. */ void -Dbtux::searchToScan(Frag& frag, ConstData boundInfo, unsigned boundCount, TreePos& treePos) +Dbtux::searchToScan(Frag& frag, ConstData boundInfo, unsigned boundCount, bool descending, TreePos& treePos) { const TreeHead& tree = frag.m_tree; - NodeHandle currNode(frag); - currNode.m_loc = tree.m_root; - if (currNode.m_loc == NullTupLoc) { - // empty tree - jam(); - treePos.m_match = false; + if (tree.m_root != NullTupLoc) { + if (! descending) + searchToScanAscending(frag, boundInfo, boundCount, treePos); + else + searchToScanDescending(frag, boundInfo, boundCount, treePos); return; } + // empty tree +} + +void +Dbtux::searchToScanAscending(Frag& frag, ConstData boundInfo, unsigned boundCount, TreePos& treePos) +{ + const TreeHead& tree = frag.m_tree; + NodeHandle currNode(frag); + currNode.m_loc = tree.m_root; NodeHandle glbNode(frag); // potential g.l.b of final node NodeHandle bottomNode(frag); + // always before entry + treePos.m_match = false; while (true) { jam(); selectNode(currNode, currNode.m_loc); @@ -283,6 +294,7 @@ Dbtux::searchToScan(Frag& frag, ConstData boundInfo, unsigned boundCount, TreePo ndbrequire(ret != NdbSqlUtil::CmpUnknown); } if (ret < 0) { + // bound is left of this node jam(); const TupLoc loc = currNode.getLink(0); if (loc != NullTupLoc) { @@ -300,11 +312,11 @@ Dbtux::searchToScan(Frag& frag, ConstData boundInfo, unsigned boundCount, TreePo // start scanning this node treePos.m_loc = currNode.m_loc; treePos.m_pos = 0; - treePos.m_match = false; treePos.m_dir = 3; return; } } else if (ret > 0) { + // bound is at or right of this node jam(); const TupLoc loc = currNode.getLink(1); if (loc != NullTupLoc) { @@ -316,7 +328,7 @@ Dbtux::searchToScan(Frag& frag, ConstData boundInfo, unsigned boundCount, TreePo continue; } } else { - ndbassert(false); + ndbrequire(false); } break; } @@ -328,20 +340,19 @@ Dbtux::searchToScan(Frag& frag, ConstData boundInfo, unsigned boundCount, TreePo ret = cmpScanBound(frag, 0, boundInfo, boundCount, c_entryKey); ndbrequire(ret != NdbSqlUtil::CmpUnknown); if (ret < 0) { - // start scanning from current entry + // found first entry satisfying the bound treePos.m_loc = currNode.m_loc; treePos.m_pos = j; - treePos.m_match = false; treePos.m_dir = 3; return; } } + // bound is to right of this node if (! bottomNode.isNull()) { jam(); // start scanning the l.u.b treePos.m_loc = bottomNode.m_loc; treePos.m_pos = 0; - treePos.m_match = false; treePos.m_dir = 3; return; } @@ -349,3 +360,90 @@ Dbtux::searchToScan(Frag& frag, ConstData boundInfo, unsigned boundCount, TreePo treePos.m_loc = currNode.m_loc; treePos.m_dir = 1; } + +void +Dbtux::searchToScanDescending(Frag& frag, ConstData boundInfo, unsigned boundCount, TreePos& treePos) +{ + const TreeHead& tree = frag.m_tree; + NodeHandle currNode(frag); + currNode.m_loc = tree.m_root; + NodeHandle glbNode(frag); // potential g.l.b of final node + NodeHandle bottomNode(frag); + // always before entry + treePos.m_match = false; + while (true) { + jam(); + selectNode(currNode, currNode.m_loc); + int ret; + // compare prefix + ret = cmpScanBound(frag, 1, boundInfo, boundCount, currNode.getPref(), tree.m_prefSize); + if (ret == NdbSqlUtil::CmpUnknown) { + jam(); + // read and compare all attributes + readKeyAttrs(frag, currNode.getMinMax(0), 0, c_entryKey); + ret = cmpScanBound(frag, 1, boundInfo, boundCount, c_entryKey); + ndbrequire(ret != NdbSqlUtil::CmpUnknown); + } + if (ret < 0) { + // bound is left of this node + jam(); + const TupLoc loc = currNode.getLink(0); + if (loc != NullTupLoc) { + jam(); + // continue to left subtree + currNode.m_loc = loc; + continue; + } + if (! glbNode.isNull()) { + jam(); + // move up to the g.l.b but remember the bottom node + bottomNode = currNode; + currNode = glbNode; + } else { + // empty result set + return; + } + } else if (ret > 0) { + // bound is at or right of this node + jam(); + const TupLoc loc = currNode.getLink(1); + if (loc != NullTupLoc) { + jam(); + // save potential g.l.b + glbNode = currNode; + // continue to right subtree + currNode.m_loc = loc; + continue; + } + } else { + ndbrequire(false); + } + break; + } + for (unsigned j = 0, occup = currNode.getOccup(); j < occup; j++) { + jam(); + int ret; + // read and compare attributes + readKeyAttrs(frag, currNode.getEnt(j), 0, c_entryKey); + ret = cmpScanBound(frag, 1, boundInfo, boundCount, c_entryKey); + ndbrequire(ret != NdbSqlUtil::CmpUnknown); + if (ret < 0) { + if (j > 0) { + // start scanning from previous entry + treePos.m_loc = currNode.m_loc; + treePos.m_pos = j - 1; + treePos.m_dir = 3; + return; + } + // start scanning upwards (pretend we came from left child) + treePos.m_loc = currNode.m_loc; + treePos.m_pos = 0; + treePos.m_dir = 0; + return; + } + } + // start scanning this node + treePos.m_loc = currNode.m_loc; + treePos.m_pos = currNode.getOccup() - 1; + treePos.m_dir = 3; +} diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxTree.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxTree.cpp index b9e3b593a00..5107a8d8e31 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxTree.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxTree.cpp @@ -226,6 +226,9 @@ Dbtux::treeRemoveInner(Frag& frag, NodeHandle lubNode, unsigned pos) // borrow max entry from semi/leaf Uint32 scanList = RNIL; nodePopDown(glbNode, glbNode.getOccup() - 1, ent, &scanList); + // g.l.b may be empty now + // a descending scan may try to enter the empty g.l.b + // we prevent this in scanNext nodePopUp(lubNode, pos, ent, scanList); if (glbNode.getLink(0) != NullTupLoc) { jam(); diff --git a/ndb/src/kernel/blocks/dbtux/Times.txt b/ndb/src/kernel/blocks/dbtux/Times.txt index 1e6d0a0a329..68120084846 100644 --- a/ndb/src/kernel/blocks/dbtux/Times.txt +++ b/ndb/src/kernel/blocks/dbtux/Times.txt @@ -138,6 +138,14 @@ before mc02/c 5 ms 13 ms 126 pct after mc02/c 5 ms 10 ms 70 pct mc02/d 178 ms 242 ms 69 pct -[ prelim preformance fix for max batch size 16 -> 992 ] +[ prelim performance fix for max batch size 16 -> 992 ] + +wl-2066 mc02/c 5 ms 10 ms 87 pct +before mc02/d 140 ms 237 ms 69 pct + +wl-2066 mc02/c 5 ms 10 ms 69 pct +after mc02/d 150 ms 229 ms 52 pct + +[ wl-2066 = remove ACC storage, use TUX test to see effect ] vim: set et: diff --git a/ndb/src/kernel/blocks/dbutil/DbUtil.cpp b/ndb/src/kernel/blocks/dbutil/DbUtil.cpp index b94bb8e6d7e..0f45c407d83 100644 --- a/ndb/src/kernel/blocks/dbutil/DbUtil.cpp +++ b/ndb/src/kernel/blocks/dbutil/DbUtil.cpp @@ -60,6 +60,7 @@ DbUtil::DbUtil(const Configuration & conf) : BLOCK_CONSTRUCTOR(DbUtil); // Add received signals + addRecSignal(GSN_READ_CONFIG_REQ, &DbUtil::execREAD_CONFIG_REQ); addRecSignal(GSN_STTOR, &DbUtil::execSTTOR); addRecSignal(GSN_NDB_STTOR, &DbUtil::execNDB_STTOR); addRecSignal(GSN_DUMP_STATE_ORD, &DbUtil::execDUMP_STATE_ORD); @@ -111,47 +112,6 @@ DbUtil::DbUtil(const Configuration & conf) : addRecSignal(GSN_UTIL_RELEASE_REQ, &DbUtil::execUTIL_RELEASE_REQ); addRecSignal(GSN_UTIL_RELEASE_CONF, &DbUtil::execUTIL_RELEASE_CONF); addRecSignal(GSN_UTIL_RELEASE_REF, &DbUtil::execUTIL_RELEASE_REF); - - c_pagePool.setSize(10); - c_preparePool.setSize(1); // one parallel prepare at a time - c_preparedOperationPool.setSize(5); // three hardcoded, two for test - c_operationPool.setSize(64); // 64 parallel operations - c_transactionPool.setSize(32); // 16 parallel transactions - c_attrMappingPool.setSize(100); - c_dataBufPool.setSize(6000); // 6000*11*4 = 264K > 8k+8k*16 = 256k - { - SLList<Prepare> tmp(c_preparePool); - PreparePtr ptr; - while(tmp.seize(ptr)) - new (ptr.p) Prepare(c_pagePool); - tmp.release(); - } - { - SLList<Operation> tmp(c_operationPool); - OperationPtr ptr; - while(tmp.seize(ptr)) - new (ptr.p) Operation(c_dataBufPool, c_dataBufPool, c_dataBufPool); - tmp.release(); - } - { - SLList<PreparedOperation> tmp(c_preparedOperationPool); - PreparedOperationPtr ptr; - while(tmp.seize(ptr)) - new (ptr.p) PreparedOperation(c_attrMappingPool, - c_dataBufPool, c_dataBufPool); - tmp.release(); - } - { - SLList<Transaction> tmp(c_transactionPool); - TransactionPtr ptr; - while(tmp.seize(ptr)) - new (ptr.p) Transaction(c_pagePool, c_operationPool); - tmp.release(); - } - - c_lockQueuePool.setSize(5); - c_lockElementPool.setSize(5); - c_lockQueues.setSize(8); } DbUtil::~DbUtil() @@ -197,6 +157,68 @@ DbUtil::releaseTransaction(TransactionPtr transPtr){ c_runningTransactions.release(transPtr); } +void +DbUtil::execREAD_CONFIG_REQ(Signal* signal) +{ + jamEntry(); + + const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr(); + + Uint32 ref = req->senderRef; + Uint32 senderData = req->senderData; + + const ndb_mgm_configuration_iterator * p = + theConfiguration.getOwnConfigIterator(); + ndbrequire(p != 0); + + c_pagePool.setSize(10); + c_preparePool.setSize(1); // one parallel prepare at a time + c_preparedOperationPool.setSize(5); // three hardcoded, two for test + c_operationPool.setSize(64); // 64 parallel operations + c_transactionPool.setSize(32); // 16 parallel transactions + c_attrMappingPool.setSize(100); + c_dataBufPool.setSize(6000); // 6000*11*4 = 264K > 8k+8k*16 = 256k + { + SLList<Prepare> tmp(c_preparePool); + PreparePtr ptr; + while(tmp.seize(ptr)) + new (ptr.p) Prepare(c_pagePool); + tmp.release(); + } + { + SLList<Operation> tmp(c_operationPool); + OperationPtr ptr; + while(tmp.seize(ptr)) + new (ptr.p) Operation(c_dataBufPool, c_dataBufPool, c_dataBufPool); + tmp.release(); + } + { + SLList<PreparedOperation> tmp(c_preparedOperationPool); + PreparedOperationPtr ptr; + while(tmp.seize(ptr)) + new (ptr.p) PreparedOperation(c_attrMappingPool, + c_dataBufPool, c_dataBufPool); + tmp.release(); + } + { + SLList<Transaction> tmp(c_transactionPool); + TransactionPtr ptr; + while(tmp.seize(ptr)) + new (ptr.p) Transaction(c_pagePool, c_operationPool); + tmp.release(); + } + + c_lockQueuePool.setSize(5); + c_lockElementPool.setSize(5); + c_lockQueues.setSize(8); + + ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend(); + conf->senderRef = reference(); + conf->senderData = senderData; + sendSignal(ref, GSN_READ_CONFIG_CONF, signal, + ReadConfigConf::SignalLength, JBB); +} + void DbUtil::execSTTOR(Signal* signal) { diff --git a/ndb/src/kernel/blocks/dbutil/DbUtil.hpp b/ndb/src/kernel/blocks/dbutil/DbUtil.hpp index 5499970fde3..983dd4402a4 100644 --- a/ndb/src/kernel/blocks/dbutil/DbUtil.hpp +++ b/ndb/src/kernel/blocks/dbutil/DbUtil.hpp @@ -69,6 +69,7 @@ protected: /** * Startup & Misc */ + void execREAD_CONFIG_REQ(Signal* signal); void execSTTOR(Signal* signal); void execNDB_STTOR(Signal* signal); void execDUMP_STATE_ORD(Signal* signal); diff --git a/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp b/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp index ae40a7c4581..7aa5be7a3cb 100644 --- a/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp +++ b/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp @@ -173,6 +173,7 @@ private: // Received signals void execDUMP_STATE_ORD(Signal* signal); + void execREAD_CONFIG_REQ(Signal* signal); void execSTTOR(Signal* signal); void execTCSEIZECONF(Signal* signal); void execTCSEIZEREF(Signal* signal); @@ -225,7 +226,7 @@ private: CheckNodeGroups::Output checkNodeGroups(Signal*, const NdbNodeBitmask &); // Generated statement blocks - void systemErrorLab(Signal* signal); + void systemErrorLab(Signal* signal, int line); void createSystableLab(Signal* signal, unsigned index); void crSystab7Lab(Signal* signal); diff --git a/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp b/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp index f9414eb8848..08251348b2b 100644 --- a/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp +++ b/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp @@ -63,6 +63,7 @@ Ndbcntr::Ndbcntr(const class Configuration & conf): // Received signals addRecSignal(GSN_DUMP_STATE_ORD, &Ndbcntr::execDUMP_STATE_ORD); + addRecSignal(GSN_READ_CONFIG_REQ, &Ndbcntr::execREAD_CONFIG_REQ); addRecSignal(GSN_STTOR, &Ndbcntr::execSTTOR); addRecSignal(GSN_TCSEIZECONF, &Ndbcntr::execTCSEIZECONF); addRecSignal(GSN_TCSEIZEREF, &Ndbcntr::execTCSEIZEREF); diff --git a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp index 5a841d6f836..e6bb4d4f14f 100644 --- a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +++ b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp @@ -51,6 +51,10 @@ #include <NdbOut.hpp> #include <NdbTick.h> +// used during shutdown for reporting current startphase +// accessed from Emulator.cpp, NdbShutdown() +Uint32 g_currentStartPhase; + /** * ALL_BLOCKS Used during start phases and while changing node state * @@ -83,6 +87,24 @@ static BlockInfo ALL_BLOCKS[] = { static const Uint32 ALL_BLOCKS_SZ = sizeof(ALL_BLOCKS)/sizeof(BlockInfo); +static BlockReference readConfigOrder[ALL_BLOCKS_SZ] = { + DBTUP_REF, + DBACC_REF, + DBTC_REF, + DBLQH_REF, + DBTUX_REF, + DBDICT_REF, + DBDIH_REF, + NDBFS_REF, + NDBCNTR_REF, + QMGR_REF, + CMVMI_REF, + TRIX_REF, + BACKUP_REF, + DBUTIL_REF, + SUMA_REF +}; + /*******************************/ /* CONTINUEB */ /*******************************/ @@ -119,7 +141,7 @@ void Ndbcntr::execCONTINUEB(Signal* signal) else tmp.appfmt(" %d", to_3); - progError(__LINE__, ERR_SYSTEM_ERROR, tmp.c_str()); + progError(__LINE__, NDBD_EXIT_RESTART_TIMEOUT, tmp.c_str()); } signal->theData[0] = ZSTARTUP; @@ -132,7 +154,7 @@ void Ndbcntr::execCONTINUEB(Signal* signal) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; break; }//switch @@ -150,13 +172,6 @@ void Ndbcntr::execSYSTEM_ERROR(Signal* signal) jamEntry(); switch (sysErr->errorCode){ - case SystemError::StartInProgressError: - BaseString::snprintf(buf, sizeof(buf), - "Node %d killed this node because " - "master start in progress error", - killingNode); - break; - case SystemError::GCPStopDetected: BaseString::snprintf(buf, sizeof(buf), "Node %d killed this node because " @@ -164,20 +179,6 @@ void Ndbcntr::execSYSTEM_ERROR(Signal* signal) killingNode); break; - case SystemError::ScanfragTimeout: - BaseString::snprintf(buf, sizeof(buf), - "Node %d killed this node because " - "a fragment scan timed out and could not be stopped", - killingNode); - break; - - case SystemError::ScanfragStateError: - BaseString::snprintf(buf, sizeof(buf), - "Node %d killed this node because " - "the state of a fragment scan was out of sync.", - killingNode); - break; - case SystemError::CopyFragRefError: BaseString::snprintf(buf, sizeof(buf), "Node %d killed this node because " @@ -193,12 +194,31 @@ void Ndbcntr::execSYSTEM_ERROR(Signal* signal) break; } - progError(__LINE__, - ERR_SYSTEM_ERROR, - buf); + progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, buf); return; }//Ndbcntr::execSYSTEM_ERROR() +void +Ndbcntr::execREAD_CONFIG_REQ(Signal* signal) +{ + jamEntry(); + + const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr(); + + Uint32 ref = req->senderRef; + Uint32 senderData = req->senderData; + + const ndb_mgm_configuration_iterator * p = + theConfiguration.getOwnConfigIterator(); + ndbrequire(p != 0); + + ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend(); + conf->senderRef = reference(); + conf->senderData = senderData; + sendSignal(ref, GSN_READ_CONFIG_CONF, signal, + ReadConfigConf::SignalLength, JBB); +} + void Ndbcntr::execSTTOR(Signal* signal) { jamEntry(); @@ -322,7 +342,7 @@ void Ndbcntr::execNDB_STTORRY(Signal* signal) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; break; }//switch @@ -362,7 +382,7 @@ void Ndbcntr::startPhase1Lab(Signal* signal) void Ndbcntr::execREAD_NODESREF(Signal* signal) { jamEntry(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//Ndbcntr::execREAD_NODESREF() @@ -373,7 +393,7 @@ void Ndbcntr::execREAD_NODESREF(Signal* signal) void Ndbcntr::execNDB_STARTREF(Signal* signal) { jamEntry(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//Ndbcntr::execNDB_STARTREF() @@ -507,6 +527,9 @@ Ndbcntr::execCNTR_START_REF(Signal * signal){ cmasterNodeId = ref->masterNodeId; sendCntrStartReq(signal); return; + case CntrStartRef::StopInProgress: + jam(); + progError(__LINE__, NDBD_EXIT_RESTART_DURING_SHUTDOWN); } ndbrequire(false); } @@ -568,6 +591,13 @@ Ndbcntr::execCNTR_START_REP(Signal* signal){ Uint32 nodeId = signal->theData[0]; c_startedNodes.set(nodeId); c_start.m_starting.clear(nodeId); + + /** + * Inform all interested blocks that node has started + */ + for(Uint32 i = 0; i<ALL_BLOCKS_SZ; i++){ + sendSignal(ALL_BLOCKS[i].Ref, GSN_NODE_START_REP, signal, 1, JBB); + } if(!c_start.m_starting.isclear()){ jam(); @@ -797,17 +827,9 @@ Ndbcntr::trySystemRestart(Signal* signal){ return false; } - if(!allNodes && c_start.m_startPartialTimeout > now){ - jam(); - return false; - } - NodeState::StartType srType = NodeState::ST_SYSTEM_RESTART; - if(c_start.m_waiting.equal(c_start.m_withoutLog)){ - if(!allNodes){ - jam(); - return false; - } + if(c_start.m_waiting.equal(c_start.m_withoutLog)) + { jam(); srType = NodeState::ST_INITIAL_START; c_start.m_starting = c_start.m_withoutLog; // Used for starting... @@ -837,10 +859,6 @@ Ndbcntr::trySystemRestart(Signal* signal){ ndbrequire(false); // All nodes -> partitioning, which is not allowed } - if(c_start.m_startPartitionedTimeout > now){ - jam(); - return false; - } break; } @@ -1347,7 +1365,7 @@ void Ndbcntr::execCNTR_WAITREP(Signal* signal) break; default: jam(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); break; }//switch }//Ndbcntr::execCNTR_WAITREP() @@ -1403,22 +1421,19 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal) const bool tStartConf = (phase > 2) || (phase == 2 && cndbBlocksCount > 0); if(tMasterFailed){ - progError(__LINE__, - ERR_SR_OTHERNODEFAILED, + progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, "Unhandled node failure during restart"); } if(tStartConf && tStarting){ // One of other starting nodes has crashed... - progError(__LINE__, - ERR_SR_OTHERNODEFAILED, + progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, "Unhandled node failure of starting node during restart"); } if(tStartConf && tStarted){ // One of other started nodes has crashed... - progError(__LINE__, - ERR_SR_OTHERNODEFAILED, + progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, "Unhandled node failure of started node during restart"); } @@ -1453,9 +1468,6 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal) sendSignal(BACKUP_REF, GSN_NODE_FAILREP, signal, NodeFailRep::SignalLength, JBB); - sendSignal(SUMA_REF, GSN_NODE_FAILREP, signal, - NodeFailRep::SignalLength, JBB); - if (c_stopRec.stopReq.senderRef) { jam(); @@ -1516,7 +1528,7 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal) } } - signal->theData[0] = EventReport::NODE_FAILREP; + signal->theData[0] = NDB_LE_NODE_FAILREP; signal->theData[2] = 0; Uint32 nodeId = 0; @@ -1588,9 +1600,9 @@ void Ndbcntr::execREAD_NODESREQ(Signal* signal) /*----------------------------------------------------------------------*/ // SENDS APPL_ERROR TO QMGR AND THEN SET A POINTER OUT OF BOUNDS /*----------------------------------------------------------------------*/ -void Ndbcntr::systemErrorLab(Signal* signal) +void Ndbcntr::systemErrorLab(Signal* signal, int line) { - progError(0, 0); /* BUG INSERTION */ + progError(line, NDBD_EXIT_NDBREQUIRE); /* BUG INSERTION */ return; }//Ndbcntr::systemErrorLab() @@ -1599,10 +1611,9 @@ void Ndbcntr::systemErrorLab(Signal* signal) /* |-2048| # 1 00000001 | */ /* | : | : | */ /* | -1 | # 1 00000001 | */ -/* | 0 | 0 | */ -/* | 1 | 0 | */ -/* | : | : | */ -/* | 2047| 0 | */ +/* | 1 | 0 | tupleid sequence now created on first use */ +/* | : | : | v */ +/* | 2048| 0 | v */ /*---------------------------------------------------------------------------*/ void Ndbcntr::createSystableLab(Signal* signal, unsigned index) { @@ -1637,13 +1648,9 @@ void Ndbcntr::createSystableLab(Signal* signal, unsigned index) ndbassert(column.pos == i); w.add(DictTabInfo::AttributeName, column.name); w.add(DictTabInfo::AttributeId, (Uint32)column.pos); - //w.add(DictTabInfo::AttributeType, DictTabInfo::UnSignedType); - //w.add(DictTabInfo::AttributeSize, DictTabInfo::a32Bit); - //w.add(DictTabInfo::AttributeArraySize, 1); w.add(DictTabInfo::AttributeKeyFlag, (Uint32)column.keyFlag); //w.add(DictTabInfo::AttributeStorage, (Uint32)DictTabInfo::MainMemory); w.add(DictTabInfo::AttributeNullableFlag, (Uint32)column.nullable); - // ext type overrides w.add(DictTabInfo::AttributeExtType, (Uint32)column.type); w.add(DictTabInfo::AttributeExtLength, (Uint32)column.length); w.add(DictTabInfo::AttributeEnd, (Uint32)true); @@ -1666,7 +1673,7 @@ void Ndbcntr::createSystableLab(Signal* signal, unsigned index) void Ndbcntr::execCREATE_TABLE_REF(Signal* signal) { jamEntry(); - progError(0,0); + progError(__LINE__,NDBD_EXIT_NDBREQUIRE, "CREATE_TABLE_REF"); return; }//Ndbcntr::execDICTTABREF() @@ -1815,8 +1822,7 @@ void Ndbcntr::crSystab8Lab(Signal* signal) jam(); ckey = 1; ctransidPhase = ZFALSE; - crSystab7Lab(signal); - return; + // skip 2nd loop - tupleid sequence now created on first use }//if signal->theData[0] = ctcConnectionP; signal->theData[1] = reference(); @@ -1867,28 +1873,28 @@ void Ndbcntr::execGETGCICONF(Signal* signal) void Ndbcntr::execTCKEYREF(Signal* signal) { jamEntry(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//Ndbcntr::execTCKEYREF() void Ndbcntr::execTCROLLBACKREP(Signal* signal) { jamEntry(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//Ndbcntr::execTCROLLBACKREP() void Ndbcntr::execTCRELEASEREF(Signal* signal) { jamEntry(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//Ndbcntr::execTCRELEASEREF() void Ndbcntr::execTCSEIZEREF(Signal* signal) { jamEntry(); - systemErrorLab(signal); + systemErrorLab(signal, __LINE__); return; }//Ndbcntr::execTCSEIZEREF() @@ -2054,7 +2060,7 @@ Ndbcntr::execRESUME_REQ(Signal* signal){ jamEntry(); - signal->theData[0] = EventReport::SingleUser; + signal->theData[0] = NDB_LE_SingleUser; signal->theData[1] = 2; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); @@ -2099,7 +2105,9 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ return; } - if(c_stopRec.stopReq.senderRef != 0){ + if(c_stopRec.stopReq.senderRef != 0 || + (cmasterNodeId == getOwnNodeId() && !c_start.m_starting.isclear())) + { /** * Requested a system shutdown */ @@ -2113,11 +2121,13 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ /** * Requested a node shutdown */ - if(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)) + if(c_stopRec.stopReq.senderRef && + StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)) ref->errorCode = StopRef::SystemShutdownInProgress; else ref->errorCode = StopRef::NodeShutdownInProgress; ref->senderData = senderData; + ref->masterNodeId = cmasterNodeId; if (senderRef != RNIL) sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); @@ -2129,6 +2139,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ jam(); ref->errorCode = StopRef::UnsupportedNodeShutdown; ref->senderData = senderData; + ref->masterNodeId = cmasterNodeId; if (senderRef != RNIL) sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); return; @@ -2139,6 +2150,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ jam(); ref->errorCode = StopRef::MultiNodeShutdownNotMaster; ref->senderData = senderData; + ref->masterNodeId = cmasterNodeId; if (senderRef != RNIL) sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); return; @@ -2186,13 +2198,13 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ jam(); return; } - signal->theData[0] = EventReport::NDBStopStarted; + signal->theData[0] = NDB_LE_NDBStopStarted; signal->theData[1] = StopReq::getSystemStop(c_stopRec.stopReq.requestInfo) ? 1 : 0; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); } else { - signal->theData[0] = EventReport::SingleUser; + signal->theData[0] = NDB_LE_SingleUser; signal->theData[1] = 0; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); } @@ -2282,6 +2294,7 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){ ref->senderData = stopReq.senderData; ref->errorCode = StopRef::NodeShutdownWouldCauseSystemCrash; + ref->masterNodeId = cntr.cmasterNodeId; const BlockReference bref = stopReq.senderRef; if (bref != RNIL) @@ -2295,7 +2308,7 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){ cntr.updateNodeState(signal, newState); } - signal->theData[0] = EventReport::NDBStopAborted; + signal->theData[0] = NDB_LE_NDBStopAborted; cntr.sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB); return false; @@ -2403,7 +2416,7 @@ void Ndbcntr::execABORT_ALL_CONF(Signal* signal){ c_stopRec.stopReq.senderRef = 0; // the command is done - signal->theData[0] = EventReport::SingleUser; + signal->theData[0] = NDB_LE_SingleUser; signal->theData[1] = 1; signal->theData[2] = c_stopRec.stopReq.singleUserApi; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); @@ -2430,6 +2443,7 @@ void Ndbcntr::execABORT_ALL_REF(Signal* signal){ StopRef * const stopRef = (StopRef *)&signal->theData[0]; stopRef->senderData = c_stopRec.stopReq.senderData; stopRef->errorCode = StopRef::TransactionAbortFailed; + stopRef->masterNodeId = cmasterNodeId; sendSignal(c_stopRec.stopReq.senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); } @@ -2691,7 +2705,7 @@ Ndbcntr::execFSREMOVECONF(Signal* signal){ } void Ndbcntr::Missra::execSTART_ORD(Signal* signal){ - signal->theData[0] = EventReport::NDBStartStarted; + signal->theData[0] = NDB_LE_NDBStartStarted; signal->theData[1] = NDB_VERSION; cntr.sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); @@ -2709,7 +2723,7 @@ void Ndbcntr::Missra::sendNextREAD_CONFIG_REQ(Signal* signal){ req->senderRef = cntr.reference(); req->noOfParameters = 0; - const BlockReference ref = ALL_BLOCKS[currentBlockIndex].Ref; + const BlockReference ref = readConfigOrder[currentBlockIndex]; #if 0 ndbout_c("sending READ_CONFIG_REQ to %s(ref=%x index=%d)", @@ -2740,7 +2754,8 @@ void Ndbcntr::Missra::execREAD_CONFIG_CONF(Signal* signal){ const ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtr(); const Uint32 ref = conf->senderRef; - ndbrequire(refToBlock(ALL_BLOCKS[currentBlockIndex].Ref) == refToBlock(ref)); + ndbrequire(refToBlock(readConfigOrder[currentBlockIndex]) + == refToBlock(ref)); currentBlockIndex++; sendNextREAD_CONFIG_REQ(signal); @@ -2768,11 +2783,12 @@ void Ndbcntr::Missra::execSTTORRY(Signal* signal){ void Ndbcntr::Missra::sendNextSTTOR(Signal* signal){ - for(; currentStartPhase < 255 ; currentStartPhase++){ + for(; currentStartPhase < 255 ; + currentStartPhase++, g_currentStartPhase = currentStartPhase){ jam(); const Uint32 start = currentBlockIndex; - + if (currentStartPhase == ZSTART_PHASE_6) { // Ndbd has passed the critical startphases. @@ -2821,14 +2837,14 @@ void Ndbcntr::Missra::sendNextSTTOR(Signal* signal){ * At least one wanted this start phase, report it */ jam(); - signal->theData[0] = EventReport::StartPhaseCompleted; + signal->theData[0] = NDB_LE_StartPhaseCompleted; signal->theData[1] = currentStartPhase; signal->theData[2] = cntr.ctypeOfStart; cntr.sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); } } - signal->theData[0] = EventReport::NDBStartCompleted; + signal->theData[0] = NDB_LE_NDBStartCompleted; signal->theData[1] = NDB_VERSION; cntr.sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); @@ -2919,7 +2935,8 @@ UpgradeStartup::execCM_APPCHG(SimulatedBlock & block, Signal* signal){ return; } } - block.progError(0,0); + block.progError(__LINE__,NDBD_EXIT_NDBREQUIRE, + "UpgradeStartup::execCM_APPCHG"); } void @@ -2932,7 +2949,9 @@ UpgradeStartup::sendCntrMasterReq(Ndbcntr& cntr, Signal* signal, Uint32 n){ } if(node == NdbNodeBitmask::NotFound){ - cntr.progError(0,0); + cntr.progError(__LINE__,NDBD_EXIT_NDBREQUIRE, + "UpgradeStartup::sendCntrMasterReq " + "NdbNodeBitmask::NotFound"); } CntrMasterReq * const cntrMasterReq = (CntrMasterReq*)&signal->theData[0]; @@ -2974,5 +2993,6 @@ UpgradeStartup::execCNTR_MASTER_REPLY(SimulatedBlock & block, Signal* signal){ } } } - block.progError(0,0); + block.progError(__LINE__,NDBD_EXIT_NDBREQUIRE, + "UpgradeStartup::execCNTR_MASTER_REPLY"); } diff --git a/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp b/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp index c0ffa722f1c..ddf16024017 100644 --- a/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp +++ b/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp @@ -18,12 +18,11 @@ #include <my_sys.h> #include <my_pthread.h> -#include <Error.hpp> #include "AsyncFile.hpp" #include <ErrorHandlingMacros.hpp> #include <kernel_types.h> -#include <NdbMem.h> +#include <ndbd_malloc.hpp> #include <NdbThread.h> #include <signaldata/FsOpenReq.hpp> @@ -96,6 +95,7 @@ AsyncFile::AsyncFile() : theReportTo(0), theMemoryChannelPtr(NULL) { + m_current_request= m_last_request= 0; } void @@ -161,7 +161,7 @@ AsyncFile::run() theStartFlag = true; // Create write buffer for bigger writes theWriteBufferSize = WRITEBUFFERSIZE; - theWriteBuffer = (char *) NdbMem_Allocate(theWriteBufferSize); + theWriteBuffer = (char *) ndbd_malloc(theWriteBufferSize); NdbMutex_Unlock(theStartMutexPtr); NdbCondition_Signal(theStartConditionPtr); @@ -177,6 +177,7 @@ AsyncFile::run() endReq(); return; }//if + m_current_request= request; switch (request->action) { case Request:: open: openReq(request); @@ -226,6 +227,8 @@ AsyncFile::run() abort(); break; }//switch + m_last_request= request; + m_current_request= 0; // No need to signal as ndbfs only uses tryRead theReportTo->writeChannelNoSignal(request); @@ -509,7 +512,7 @@ AsyncFile::extendfile(Request* request) { DEBUG(ndbout_c("extendfile: maxOffset=%d, size=%d", maxOffset, maxSize)); // Allocate a buffer and fill it with zeros - void* pbuf = NdbMem_Allocate(maxSize); + void* pbuf = ndbd_malloc(maxSize); memset(pbuf, 0, maxSize); for (int p = 0; p <= maxOffset; p = p + maxSize) { int return_value; @@ -517,16 +520,18 @@ AsyncFile::extendfile(Request* request) { p, SEEK_SET); if((return_value == -1 ) || (return_value != p)) { + ndbd_free(pbuf,maxSize); return -1; } return_value = ::write(theFd, pbuf, maxSize); if ((return_value == -1) || (return_value != maxSize)) { + ndbd_free(pbuf,maxSize); return -1; } } - free(pbuf); + ndbd_free(pbuf,maxSize); DEBUG(ndbout_c("extendfile: \"%s\" OK!", theFileName.c_str())); return 0; @@ -876,7 +881,7 @@ AsyncFile::rmrfReq(Request * request, char * path, bool removePath){ void AsyncFile::endReq() { // Thread is ended with return - if (theWriteBuffer) NdbMem_Free(theWriteBuffer); + if (theWriteBuffer) ndbd_free(theWriteBuffer, theWriteBufferSize); } @@ -1033,3 +1038,60 @@ void printErrorAndFlags(Uint32 used_flags) { } #endif + +NdbOut& +operator<<(NdbOut& out, const Request& req) +{ + out << "[ Request: file: " << hex << req.file + << " userRef: " << hex << req.theUserReference + << " userData: " << dec << req.theUserPointer + << " theFilePointer: " << req.theFilePointer + << " action: "; + switch(req.action){ + case Request::open: + out << "open"; + break; + case Request::close: + out << "close"; + break; + case Request::closeRemove: + out << "closeRemove"; + break; + case Request::read: // Allways leave readv directly after + out << "read"; + break; + case Request::readv: + out << "readv"; + break; + case Request::write:// Allways leave writev directly after + out << "write"; + break; + case Request::writev: + out << "writev"; + break; + case Request::writeSync:// Allways leave writevSync directly after + out << "writeSync"; + break; + // writeSync because SimblockAsyncFileSystem depends on it + case Request::writevSync: + out << "writevSync"; + break; + case Request::sync: + out << "sync"; + break; + case Request::end: + out << "end"; + break; + case Request::append: + out << "append"; + break; + case Request::rmrf: + out << "rmrf"; + break; + default: + out << (Uint32)req.action; + break; + } + out << " ]"; + return out; +} diff --git a/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp b/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp index 2176c93c5d5..997bf40fe2a 100644 --- a/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp +++ b/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp @@ -160,6 +160,7 @@ public: Uint32 theTrace; }; +NdbOut& operator <<(NdbOut&, const Request&); inline void @@ -173,6 +174,7 @@ Request::set(BlockReference userReference, class AsyncFile { + friend class Ndbfs; public: AsyncFile(); ~AsyncFile(); @@ -188,6 +190,7 @@ public: bool isOpen(); Filename theFileName; + Request *m_current_request, *m_last_request; private: void openReq(Request *request); diff --git a/ndb/src/kernel/blocks/ndbfs/CircularIndex.hpp b/ndb/src/kernel/blocks/ndbfs/CircularIndex.hpp index 349cccdbcb4..460ad3f614a 100644 --- a/ndb/src/kernel/blocks/ndbfs/CircularIndex.hpp +++ b/ndb/src/kernel/blocks/ndbfs/CircularIndex.hpp @@ -68,7 +68,7 @@ class CircularIndex { public: inline CircularIndex( int start= 0,int size=256 ); - operator int (); + operator int () const; CircularIndex& operator ++ (); friend int full( const CircularIndex& write, const CircularIndex& read ); friend int empty( const CircularIndex& write, const CircularIndex& read ); @@ -77,7 +77,7 @@ private: int theIndex; }; -inline CircularIndex::operator int () +inline CircularIndex::operator int () const { return theIndex; } diff --git a/ndb/src/kernel/blocks/ndbfs/Filename.cpp b/ndb/src/kernel/blocks/ndbfs/Filename.cpp index 15158ec19ef..238390f262c 100644 --- a/ndb/src/kernel/blocks/ndbfs/Filename.cpp +++ b/ndb/src/kernel/blocks/ndbfs/Filename.cpp @@ -20,7 +20,6 @@ #include "Filename.hpp" #include "ErrorHandlingMacros.hpp" -#include "Error.hpp" #include "RefConvert.hpp" #include "DebuggerNames.hpp" @@ -52,7 +51,7 @@ Filename::init(Uint32 nodeid, DBUG_ENTER("Filename::init"); if (pFileSystemPath == NULL) { - ERROR_SET(fatal, AFS_ERROR_NOPATH, ""," Filename::init()"); + ERROR_SET(fatal, NDBD_EXIT_AFS_NOPATH, "","Missing FileSystemPath"); return; } @@ -109,7 +108,7 @@ Filename::set(BlockReference blockReference, { const char* blockName = getBlockName( refToBlock(blockReference) ); if (blockName == NULL){ - ERROR_SET(ecError, AFS_ERROR_PARAMETER,"","No Block Name"); + ERROR_SET(ecError, NDBD_EXIT_AFS_PARAMETER,"","No Block Name"); return; } BaseString::snprintf(buf, sizeof(buf), "%s%s", blockName, DIR_SEPARATOR); @@ -165,7 +164,7 @@ Filename::set(BlockReference blockReference, const Uint32 diskNo = FsOpenReq::v1_getDisk(filenumber); if(diskNo == 0xFF){ - ERROR_SET(ecError, AFS_ERROR_PARAMETER,"","Invalid disk specification"); + ERROR_SET(ecError, NDBD_EXIT_AFS_PARAMETER,"","Invalid disk specification"); } BaseString::snprintf(buf, sizeof(buf), "D%d%s", diskNo, DIR_SEPARATOR); @@ -174,10 +173,10 @@ Filename::set(BlockReference blockReference, } break; default: - ERROR_SET(ecError, AFS_ERROR_PARAMETER,"","Wrong version"); + ERROR_SET(ecError, NDBD_EXIT_AFS_PARAMETER,"","Wrong version"); } if (type >= noOfExtensions){ - ERROR_SET(ecError, AFS_ERROR_PARAMETER,"","File Type doesn't exist"); + ERROR_SET(ecError, NDBD_EXIT_AFS_PARAMETER,"","File Type doesn't exist"); return; } strcat(theName, fileExtension[type]); diff --git a/ndb/src/kernel/blocks/ndbfs/MemoryChannel.hpp b/ndb/src/kernel/blocks/ndbfs/MemoryChannel.hpp index 6bb9684f3ca..f46cc66fe16 100644 --- a/ndb/src/kernel/blocks/ndbfs/MemoryChannel.hpp +++ b/ndb/src/kernel/blocks/ndbfs/MemoryChannel.hpp @@ -70,7 +70,6 @@ #else #include "ErrorHandlingMacros.hpp" -#include "Error.hpp" #include "CircularIndex.hpp" #include "NdbMutex.h" #include "NdbCondition.h" @@ -97,8 +96,20 @@ private: NdbMutex* theMutexPtr; NdbCondition* theConditionPtr; + template<class U> + friend NdbOut& operator<<(NdbOut& out, const MemoryChannel<U> & chn); }; +template <class T> +NdbOut& operator<<(NdbOut& out, const MemoryChannel<T> & chn) +{ + NdbMutex_Lock(chn.theMutexPtr); + out << "[ theSize: " << chn.theSize + << " theReadIndex: " << (int)chn.theReadIndex + << " theWriteIndex: " << (int)chn.theWriteIndex << " ]"; + NdbMutex_Unlock(chn.theMutexPtr); + return out; +} template <class T> MemoryChannel<T>::MemoryChannel( int size): theSize(size), diff --git a/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp b/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp index 6f848d7fe16..5049c726315 100644 --- a/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp +++ b/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp @@ -19,7 +19,6 @@ #include "Ndbfs.hpp" #include "AsyncFile.hpp" #include "Filename.hpp" -#include "Error.hpp" #include <signaldata/FsOpenReq.hpp> #include <signaldata/FsCloseReq.hpp> @@ -57,26 +56,10 @@ Ndbfs::Ndbfs(const Configuration & conf) : theLastId(0), m_maxOpenedFiles(0) { - theFileSystemPath = conf.fileSystemPath(); - theBackupFilePath = conf.backupFilePath(); - - theRequestPool = new Pool<Request>; - - const ndb_mgm_configuration_iterator * p = conf.getOwnConfigIterator(); - ndbrequire(p != 0); - - m_maxFiles = 40; - ndb_mgm_get_int_parameter(p, CFG_DB_MAX_OPEN_FILES, &m_maxFiles); - - // Create idle AsyncFiles - Uint32 noIdleFiles = m_maxFiles > 27 ? 27 : m_maxFiles ; - for (Uint32 i = 0; i < noIdleFiles; i++){ - theIdleFiles.push_back(createAsyncFile()); - } - BLOCK_CONSTRUCTOR(Ndbfs); // Set received signals + addRecSignal(GSN_READ_CONFIG_REQ, &Ndbfs::execREAD_CONFIG_REQ); addRecSignal(GSN_DUMP_STATE_ORD, &Ndbfs::execDUMP_STATE_ORD); addRecSignal(GSN_STTOR, &Ndbfs::execSTTOR); addRecSignal(GSN_FSOPENREQ, &Ndbfs::execFSOPENREQ); @@ -88,6 +71,8 @@ Ndbfs::Ndbfs(const Configuration & conf) : addRecSignal(GSN_FSAPPENDREQ, &Ndbfs::execFSAPPENDREQ); addRecSignal(GSN_FSREMOVEREQ, &Ndbfs::execFSREMOVEREQ); // Set send signals + + theRequestPool = 0; } Ndbfs::~Ndbfs() @@ -102,7 +87,41 @@ Ndbfs::~Ndbfs() }//for theFiles.clear(); - delete theRequestPool; + if (theRequestPool) + delete theRequestPool; +} + +void +Ndbfs::execREAD_CONFIG_REQ(Signal* signal) +{ + const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr(); + + Uint32 ref = req->senderRef; + Uint32 senderData = req->senderData; + + const ndb_mgm_configuration_iterator * p = + theConfiguration.getOwnConfigIterator(); + ndbrequire(p != 0); + + theFileSystemPath = theConfiguration.fileSystemPath(); + theBackupFilePath = theConfiguration.backupFilePath(); + + theRequestPool = new Pool<Request>; + + m_maxFiles = 40; + ndb_mgm_get_int_parameter(p, CFG_DB_MAX_OPEN_FILES, &m_maxFiles); + + // Create idle AsyncFiles + Uint32 noIdleFiles = m_maxFiles > 27 ? 27 : m_maxFiles ; + for (Uint32 i = 0; i < noIdleFiles; i++){ + theIdleFiles.push_back(createAsyncFile()); + } + + ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend(); + conf->senderRef = reference(); + conf->senderData = senderData; + sendSignal(ref, GSN_READ_CONFIG_CONF, signal, + ReadConfigConf::SignalLength, JBB); } /* Received a restart signal. @@ -557,7 +576,7 @@ Ndbfs::createAsyncFile(){ AsyncFile* file = theFiles[i]; ndbout_c("%2d (0x%x): %s", i, file, file->isOpen()?"OPEN":"CLOSED"); } - ERROR_SET(fatal, AFS_ERROR_MAXOPEN,""," Ndbfs::createAsyncFile"); + ERROR_SET(fatal, NDBD_EXIT_AFS_MAXOPEN,""," Ndbfs::createAsyncFile"); } AsyncFile* file = new AsyncFile; @@ -1006,6 +1025,30 @@ Ndbfs::execDUMP_STATE_ORD(Signal* signal) } return; } + + if(signal->theData[0] == 404) + { + ndbrequire(signal->getLength() == 2); + Uint32 file= signal->theData[1]; + AsyncFile* openFile = theOpenFiles.find(file); + ndbrequire(openFile); + ndbout_c("File: %s %p", openFile->theFileName.c_str(), openFile); + Request* curr = openFile->m_current_request; + Request* last = openFile->m_last_request; + if(curr) + ndbout << "Current request: " << *curr << endl; + if(last) + ndbout << "Last request: " << *last << endl; + + ndbout << "theReportTo " << *openFile->theReportTo << endl; + ndbout << "theMemoryChannelPtr" << *openFile->theMemoryChannelPtr << endl; + + ndbout << "All files: " << endl; + for (unsigned i = 0; i < theFiles.size(); i++){ + AsyncFile* file = theFiles[i]; + ndbout_c("%2d (0x%x): %s", i,file, file->isOpen()?"OPEN":"CLOSED"); + } + } }//Ndbfs::execDUMP_STATE_ORD() @@ -1016,3 +1059,4 @@ template class Vector<AsyncFile*>; template class Vector<OpenFiles::OpenFileItem>; template class MemoryChannel<Request>; template class Pool<Request>; +template NdbOut& operator<<(NdbOut&, const MemoryChannel<Request>&); diff --git a/ndb/src/kernel/blocks/ndbfs/Ndbfs.hpp b/ndb/src/kernel/blocks/ndbfs/Ndbfs.hpp index c5aaa4e5c49..17ce8fbd8aa 100644 --- a/ndb/src/kernel/blocks/ndbfs/Ndbfs.hpp +++ b/ndb/src/kernel/blocks/ndbfs/Ndbfs.hpp @@ -41,6 +41,7 @@ protected: BLOCK_DEFINES(Ndbfs); // The signal processing functions + void execREAD_CONFIG_REQ(Signal* signal); void execDUMP_STATE_ORD(Signal* signal); void execFSOPENREQ(Signal* signal); void execFSCLOSEREQ(Signal* signal); @@ -103,6 +104,7 @@ protected: BLOCK_DEFINES(VoidFs); // The signal processing functions + void execREAD_CONFIG_REQ(Signal* signal); void execDUMP_STATE_ORD(Signal* signal); void execFSOPENREQ(Signal* signal); void execFSCLOSEREQ(Signal* signal); diff --git a/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp b/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp index 0fee687f1bc..eacda6ec77d 100644 --- a/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp +++ b/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp @@ -88,7 +88,7 @@ inline bool OpenFiles::insert(AsyncFile* file, Uint16 id){ names.assfmt("open: >%s< existing: >%s<", file->theFileName.c_str(), m_files[i].m_file->theFileName.c_str()); - ERROR_SET(fatal, AFS_ERROR_ALLREADY_OPEN, names.c_str(), + ERROR_SET(fatal, NDBD_EXIT_AFS_ALREADY_OPEN, names.c_str(), "OpenFiles::insert()"); } } diff --git a/ndb/src/kernel/blocks/ndbfs/VoidFs.cpp b/ndb/src/kernel/blocks/ndbfs/VoidFs.cpp index d093089acfc..5a03d8bb1a0 100644 --- a/ndb/src/kernel/blocks/ndbfs/VoidFs.cpp +++ b/ndb/src/kernel/blocks/ndbfs/VoidFs.cpp @@ -20,7 +20,6 @@ #include "Ndbfs.hpp" #include "AsyncFile.hpp" #include "Filename.hpp" -#include "Error.hpp" #include <signaldata/FsOpenReq.hpp> #include <signaldata/FsCloseReq.hpp> @@ -45,6 +44,7 @@ VoidFs::VoidFs(const Configuration & conf) : BLOCK_CONSTRUCTOR(VoidFs); // Set received signals + addRecSignal(GSN_READ_CONFIG_REQ, &VoidFs::execREAD_CONFIG_REQ); addRecSignal(GSN_DUMP_STATE_ORD, &VoidFs::execDUMP_STATE_ORD); addRecSignal(GSN_STTOR, &VoidFs::execSTTOR); addRecSignal(GSN_FSOPENREQ, &VoidFs::execFSOPENREQ); @@ -61,6 +61,21 @@ VoidFs::~VoidFs() { } +void +VoidFs::execREAD_CONFIG_REQ(Signal* signal) +{ + const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr(); + + Uint32 ref = req->senderRef; + Uint32 senderData = req->senderData; + + ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend(); + conf->senderRef = reference(); + conf->senderData = senderData; + sendSignal(ref, GSN_READ_CONFIG_CONF, signal, + ReadConfigConf::SignalLength, JBB); +} + void VoidFs::execSTTOR(Signal* signal) { diff --git a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp index 02be002cae0..e728ea81a7d 100644 --- a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp +++ b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp @@ -50,6 +50,7 @@ #define ZAPI_HB_HANDLING 3 #define ZTIMER_HANDLING 4 #define ZARBIT_HANDLING 5 +#define ZSTART_FAILURE_LIMIT 6 /* Error Codes ------------------------------*/ #define ZERRTOOMANY 1101 @@ -113,8 +114,19 @@ public: Uint32 m_gsn; SignalCounter m_nodes; - } c_start; + Uint32 m_latest_gci; + + Uint32 m_start_type; + NdbNodeBitmask m_skip_nodes; + NdbNodeBitmask m_starting_nodes; + NdbNodeBitmask m_starting_nodes_w_log; + Uint16 m_president_candidate; + Uint32 m_president_candidate_gci; + Uint16 m_regReqReqSent; + Uint16 m_regReqReqRecv; + } c_start; + NdbNodeBitmask c_definedNodes; // DB nodes in config NdbNodeBitmask c_clusterNodes; // DB nodes in cluster NodeBitmask c_connectedNodes; // All kinds of connected nodes @@ -125,15 +137,14 @@ public: * i.e. nodes that connect to use, when we already have elected president */ NdbNodeBitmask c_readnodes_nodes; - + Uint32 c_maxDynamicId; // Records struct NodeRec { UintR ndynamicId; Phase phase; - UintR alarmCount; - + QmgrState sendPrepFailReqStatus; QmgrState sendCommitFailReqStatus; QmgrState sendPresToStatus; @@ -225,6 +236,7 @@ private: void execDUMP_STATE_ORD(Signal* signal); void execCONNECT_REP(Signal* signal); void execNDB_FAILCONF(Signal* signal); + void execREAD_CONFIG_REQ(Signal* signal); void execSTTOR(Signal* signal); void execCM_INFOCONF(Signal* signal); void execCLOSE_COMCONF(Signal* signal); @@ -236,6 +248,9 @@ private: void execREAD_NODESREF(Signal* signal); void execREAD_NODESCONF(Signal* signal); + void execDIH_RESTARTREF(Signal* signal); + void execDIH_RESTARTCONF(Signal* signal); + void execAPI_VERSION_REQ(Signal* signal); void execAPI_BROADCAST_REP(Signal* signal); @@ -252,6 +267,7 @@ private: // Statement blocks void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn); + Uint32 check_startup(Signal* signal); void node_failed(Signal* signal, Uint16 aFailedNode); void checkStartInterface(Signal* signal); @@ -324,7 +340,7 @@ private: void stateArbitChoose(Signal* signal); void stateArbitCrash(Signal* signal); void computeArbitNdbMask(NodeBitmask& aMask); - void reportArbitEvent(Signal* signal, EventReport::EventType type); + void reportArbitEvent(Signal* signal, Ndb_logevent_type type); // Initialisation void initData(); @@ -374,12 +390,12 @@ private: /* Status flags ----------------------------------*/ Uint32 c_restartPartialTimeout; + Uint32 c_restartPartionedTimeout; + Uint32 c_restartFailureTimeout; + Uint64 c_start_election_time; Uint16 creadyDistCom; - Uint16 c_regReqReqSent; - Uint16 c_regReqReqRecv; - Uint64 c_stopElectionTime; - Uint16 cpresidentCandidate; + Uint16 cdelayRegreq; Uint16 cpresidentAlive; Uint16 cnoFailedNodes; diff --git a/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp b/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp index a8fe30d8cfa..f14cbd48695 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp @@ -35,6 +35,7 @@ void Qmgr::initData() Uint32 hbDBAPI = 500; setHbApiDelay(hbDBAPI); + c_connectedNodes.set(getOwnNodeId()); c_stopReq.senderRef = 0; }//Qmgr::initData() @@ -74,6 +75,7 @@ Qmgr::Qmgr(const class Configuration & conf) // Received signals addRecSignal(GSN_CONNECT_REP, &Qmgr::execCONNECT_REP); addRecSignal(GSN_NDB_FAILCONF, &Qmgr::execNDB_FAILCONF); + addRecSignal(GSN_READ_CONFIG_REQ, &Qmgr::execREAD_CONFIG_REQ); addRecSignal(GSN_STTOR, &Qmgr::execSTTOR); addRecSignal(GSN_CLOSE_COMCONF, &Qmgr::execCLOSE_COMCONF); addRecSignal(GSN_API_REGREQ, &Qmgr::execAPI_REGREQ); @@ -96,6 +98,9 @@ Qmgr::Qmgr(const class Configuration & conf) addRecSignal(GSN_READ_NODESREF, &Qmgr::execREAD_NODESREF); addRecSignal(GSN_READ_NODESCONF, &Qmgr::execREAD_NODESCONF); + + addRecSignal(GSN_DIH_RESTARTREF, &Qmgr::execDIH_RESTARTREF); + addRecSignal(GSN_DIH_RESTARTCONF, &Qmgr::execDIH_RESTARTCONF); initData(); }//Qmgr::Qmgr() diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 8b7caadfeb9..cc981f37987 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -94,7 +94,7 @@ void Qmgr::execCM_HEARTBEAT(Signal* signal) jamEntry(); hbNodePtr.i = signal->theData[0]; ptrCheckGuard(hbNodePtr, MAX_NDB_NODES, nodeRec); - hbNodePtr.p->alarmCount = 0; + setNodeInfo(hbNodePtr.i).m_heartbeat_cnt= 0; return; }//Qmgr::execCM_HEARTBEAT() @@ -146,6 +146,30 @@ void Qmgr::execCONTINUEB(Signal* signal) runArbitThread(signal); return; break; + case ZSTART_FAILURE_LIMIT:{ + if (cpresident != ZNIL) + { + jam(); + return; + } + Uint64 now = NdbTick_CurrentMillisecond(); + if (now > (c_start_election_time + c_restartFailureTimeout)) + { + jam(); + BaseString tmp; + tmp.append("Shutting down node as total restart time exceeds " + " StartFailureTimeout as set in config file "); + if(c_restartFailureTimeout == ~0) + tmp.append(" 0 (inifinite)"); + else + tmp.appfmt(" %d", c_restartFailureTimeout); + + progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, tmp.c_str()); + } + signal->theData[0] = ZSTART_FAILURE_LIMIT; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1); + return; + } default: jam(); // ZCOULD_NOT_OCCUR_ERROR; @@ -193,6 +217,27 @@ void Qmgr::execPRES_TOREQ(Signal* signal) return; }//Qmgr::execPRES_TOREQ() +void +Qmgr::execREAD_CONFIG_REQ(Signal* signal) +{ + jamEntry(); + + const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr(); + + Uint32 ref = req->senderRef; + Uint32 senderData = req->senderData; + + const ndb_mgm_configuration_iterator * p = + theConfiguration.getOwnConfigIterator(); + ndbrequire(p != 0); + + ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend(); + conf->senderRef = reference(); + conf->senderData = senderData; + sendSignal(ref, GSN_READ_CONFIG_CONF, signal, + ReadConfigConf::SignalLength, JBB); +} + /* 4.2 ADD NODE MODULE*/ /*##########################################################################*/ @@ -252,14 +297,28 @@ void Qmgr::startphase1(Signal* signal) nodePtr.p->phase = ZSTARTING; nodePtr.p->blockRef = reference(); c_connectedNodes.set(nodePtr.i); + + signal->theData[0] = reference(); + sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal, 1, JBB); + return; +} - signal->theData[0] = 0; // no answer - signal->theData[1] = 0; // no id - signal->theData[2] = NodeInfo::DB; - sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB); +void +Qmgr::execDIH_RESTARTREF(Signal*signal) +{ + jamEntry(); + c_start.m_latest_gci = 0; + execCM_INFOCONF(signal); +} + +void +Qmgr::execDIH_RESTARTCONF(Signal*signal) +{ + jamEntry(); + + c_start.m_latest_gci = signal->theData[1]; execCM_INFOCONF(signal); - return; } void Qmgr::setHbDelay(UintR aHbDelay) @@ -379,6 +438,7 @@ void Qmgr::execCONNECT_REP(Signal* signal) void Qmgr::execREAD_NODESCONF(Signal* signal) { + jamEntry(); check_readnodes_reply(signal, refToNode(signal->getSendersBlockRef()), GSN_READ_NODESCONF); @@ -387,6 +447,7 @@ Qmgr::execREAD_NODESCONF(Signal* signal) void Qmgr::execREAD_NODESREF(Signal* signal) { + jamEntry(); check_readnodes_reply(signal, refToNode(signal->getSendersBlockRef()), GSN_READ_NODESREF); @@ -397,25 +458,44 @@ Qmgr::execREAD_NODESREF(Signal* signal) /*******************************/ void Qmgr::execCM_INFOCONF(Signal* signal) { + /** + * Open communcation to all DB nodes + */ + signal->theData[0] = 0; // no answer + signal->theData[1] = 0; // no id + signal->theData[2] = NodeInfo::DB; + sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB); + cpresident = ZNIL; - cpresidentCandidate = getOwnNodeId(); cpresidentAlive = ZFALSE; - c_stopElectionTime = NdbTick_CurrentMillisecond(); - c_stopElectionTime += c_restartPartialTimeout; + c_start_election_time = NdbTick_CurrentMillisecond(); + + signal->theData[0] = ZSTART_FAILURE_LIMIT; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1); + cmInfoconf010Lab(signal); return; }//Qmgr::execCM_INFOCONF() +Uint32 g_start_type = 0; +NdbNodeBitmask g_nowait_nodes; // Set by clo + void Qmgr::cmInfoconf010Lab(Signal* signal) { c_start.m_startKey = 0; c_start.m_startNode = getOwnNodeId(); c_start.m_nodes.clearWaitingFor(); c_start.m_gsn = GSN_CM_REGREQ; + c_start.m_starting_nodes.clear(); + c_start.m_starting_nodes_w_log.clear(); + c_start.m_regReqReqSent = 0; + c_start.m_regReqReqRecv = 0; + c_start.m_skip_nodes = g_nowait_nodes; + c_start.m_skip_nodes.bitAND(c_definedNodes); + c_start.m_start_type = g_start_type; NodeRecPtr nodePtr; - c_regReqReqSent = c_regReqReqRecv = 0; cnoOfNodes = 0; for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) { jam(); @@ -450,14 +530,18 @@ void Qmgr::cmInfoconf010Lab(Signal* signal) void Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){ - c_regReqReqSent++; - CmRegReq * const cmRegReq = (CmRegReq *)&signal->theData[0]; - cmRegReq->blockRef = reference(); - cmRegReq->nodeId = getOwnNodeId(); - cmRegReq->version = NDB_VERSION; + CmRegReq * req = (CmRegReq *)&signal->theData[0]; + req->blockRef = reference(); + req->nodeId = getOwnNodeId(); + req->version = NDB_VERSION; + req->latest_gci = c_start.m_latest_gci; + req->start_type = c_start.m_start_type; + c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, req->skip_nodes); const Uint32 ref = calcQmgrBlockRef(nodeId); sendSignal(ref, GSN_CM_REGREQ, signal, CmRegReq::SignalLength, JBB); DEBUG_START(GSN_CM_REGREQ, nodeId, ""); + + c_start.m_regReqReqSent++; } /* @@ -497,6 +581,18 @@ Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){ /*******************************/ /* CM_REGREQ */ /*******************************/ +static +int +check_start_type(Uint32 starting, Uint32 own) +{ + if (starting == (1 << NodeState::ST_INITIAL_START) && + ((own & (1 << NodeState::ST_INITIAL_START)) == 0)) + { + return 1; + } + return 0; +} + void Qmgr::execCM_REGREQ(Signal* signal) { DEBUG_START3(signal, ""); @@ -508,6 +604,17 @@ void Qmgr::execCM_REGREQ(Signal* signal) const BlockReference Tblockref = cmRegReq->blockRef; const Uint32 startingVersion = cmRegReq->version; addNodePtr.i = cmRegReq->nodeId; + Uint32 gci = 1; + Uint32 start_type = ~0; + NdbNodeBitmask skip_nodes; + + if (signal->getLength() == CmRegReq::SignalLength) + { + jam(); + gci = cmRegReq->latest_gci; + start_type = cmRegReq->start_type; + skip_nodes.assign(NdbNodeBitmask::Size, cmRegReq->skip_nodes); + } if (creadyDistCom == ZFALSE) { jam(); @@ -521,11 +628,19 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; } - ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); - - if (cpresident != getOwnNodeId()){ + if (check_start_type(start_type, c_start.m_start_type)) + { + jam(); + sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE); + return; + } + + if (cpresident != getOwnNodeId()) + { jam(); - if (cpresident == ZNIL) { + + if (cpresident == ZNIL) + { /*** * We don't know the president. * If the node to be added has lower node id @@ -533,13 +648,18 @@ void Qmgr::execCM_REGREQ(Signal* signal) * candidate */ jam(); - if (addNodePtr.i < cpresidentCandidate) { + if (gci > c_start.m_president_candidate_gci || + (gci == c_start.m_president_candidate_gci && + addNodePtr.i < c_start.m_president_candidate)) + { jam(); - cpresidentCandidate = addNodePtr.i; - }//if + c_start.m_president_candidate = addNodePtr.i; + c_start.m_president_candidate_gci = gci; + } sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION); return; - } + } + /** * We are not the president. * We know the president. @@ -549,7 +669,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (c_start.m_startNode != 0){ + if (c_start.m_startNode != 0) + { jam(); /** * President busy by adding another node @@ -558,7 +679,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (ctoStatus == Q_ACTIVE) { + if (ctoStatus == Q_ACTIVE) + { jam(); /** * Active taking over as president @@ -567,7 +689,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB) { + if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB) + { jam(); /** * The new node is not in config file @@ -576,13 +699,15 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; } + ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); Phase phase = addNodePtr.p->phase; - if (phase != ZINIT){ + if (phase != ZINIT) + { jam(); DEBUG("phase = " << phase); sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_DEAD); return; - }//if + } jam(); /** @@ -654,7 +779,12 @@ void Qmgr::sendCmRegrefLab(Signal* signal, BlockReference TBRef, ref->blockRef = reference(); ref->nodeId = getOwnNodeId(); ref->errorCode = Terror; - ref->presidentCandidate = (cpresident == ZNIL ? cpresidentCandidate : cpresident); + ref->presidentCandidate = + (cpresident == ZNIL ? c_start.m_president_candidate : cpresident); + ref->candidate_latest_gci = c_start.m_president_candidate_gci; + ref->latest_gci = c_start.m_latest_gci; + ref->start_type = c_start.m_start_type; + c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, ref->skip_nodes); sendSignal(TBRef, GSN_CM_REGREF, signal, CmRegRef::SignalLength, JBB); DEBUG_START(GSN_CM_REGREF, refToNode(TBRef), ""); @@ -713,7 +843,7 @@ void Qmgr::execCM_REGCONF(Signal* signal) // Send this as an EVENT REPORT to inform about hearing about // other NDB node proclaiming to be president. /*--------------------------------------------------------------*/ - signal->theData[0] = EventReport::CM_REGCONF; + signal->theData[0] = NDB_LE_CM_REGCONF; signal->theData[1] = getOwnNodeId(); signal->theData[2] = cpresident; signal->theData[3] = TdynamicId; @@ -779,9 +909,9 @@ retry: char buf[255]; BaseString::snprintf(buf, sizeof(buf), - "Partitioned cluster! check StartPartialTimeout, " - " node %d thinks %d is president, " - " I think president is: %d", + "check StartPartialTimeout, " + "node %d thinks %d is president, " + "I think president is: %d", nodeId, president, cpresident); ndbout_c(buf); @@ -813,7 +943,7 @@ retry: CRASH_INSERTION(932); progError(__LINE__, - ERR_ARBIT_SHUTDOWN, + NDBD_EXIT_PARTITIONED_SHUTDOWN, buf); ndbrequire(false); @@ -848,28 +978,105 @@ Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){ /*******************************/ /* CM_REGREF */ /*******************************/ +static +const char * +get_start_type_string(Uint32 st) +{ + static char buf[256]; + + if (st == 0) + { + return "<ANY>"; + } + else + { + buf[0] = 0; + for(Uint32 i = 0; i<NodeState::ST_ILLEGAL_TYPE; i++) + { + if (st & (1 << i)) + { + if (buf[0]) + strcat(buf, "/"); + switch(i){ + case NodeState::ST_INITIAL_START: + strcat(buf, "inital start"); + break; + case NodeState::ST_SYSTEM_RESTART: + strcat(buf, "system restart"); + break; + case NodeState::ST_NODE_RESTART: + strcat(buf, "node restart"); + break; + case NodeState::ST_INITIAL_NODE_RESTART: + strcat(buf, "initial node restart"); + break; + } + } + } + return buf; + } +} + void Qmgr::execCM_REGREF(Signal* signal) { jamEntry(); - UintR TaddNodeno = signal->theData[1]; - UintR TrefuseReason = signal->theData[2]; - Uint32 candidate = signal->theData[3]; + CmRegRef* ref = (CmRegRef*)signal->getDataPtr(); + UintR TaddNodeno = ref->nodeId; + UintR TrefuseReason = ref->errorCode; + Uint32 candidate = ref->presidentCandidate; + Uint32 node_gci = 1; + Uint32 candidate_gci = 1; + Uint32 start_type = ~0; + NdbNodeBitmask skip_nodes; DEBUG_START3(signal, TrefuseReason); - c_regReqReqRecv++; + if (signal->getLength() == CmRegRef::SignalLength) + { + jam(); + node_gci = ref->latest_gci; + candidate_gci = ref->candidate_latest_gci; + start_type = ref->start_type; + skip_nodes.assign(NdbNodeBitmask::Size, ref->skip_nodes); + } + + c_start.m_regReqReqRecv++; // Ignore block reference in data[0] - if(candidate != cpresidentCandidate){ + if(candidate != c_start.m_president_candidate) + { jam(); - c_regReqReqRecv = ~0; + c_start.m_regReqReqRecv = ~0; } - + + c_start.m_starting_nodes.set(TaddNodeno); + if (node_gci) + { + jam(); + c_start.m_starting_nodes_w_log.set(TaddNodeno); + } + + skip_nodes.bitAND(c_definedNodes); + c_start.m_skip_nodes.bitOR(skip_nodes); + + char buf[100]; switch (TrefuseReason) { case CmRegRef::ZINCOMPATIBLE_VERSION: jam(); - systemErrorLab(signal, __LINE__, "incompatible version, connection refused by running ndb node"); + systemErrorLab(signal, __LINE__, + "incompatible version, " + "connection refused by running ndb node"); + case CmRegRef::ZINCOMPATIBLE_START_TYPE: + jam(); + BaseString::snprintf(buf, sizeof(buf), + "incompatible start type detected: node %d" + " reports %s(%d) my start type: %s(%d)", + TaddNodeno, + get_start_type_string(start_type), start_type, + get_start_type_string(c_start.m_start_type), + c_start.m_start_type); + progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf); break; case CmRegRef::ZBUSY: case CmRegRef::ZBUSY_TO_PRES: @@ -880,22 +1087,26 @@ void Qmgr::execCM_REGREF(Signal* signal) break; case CmRegRef::ZNOT_IN_CFG: jam(); - progError(__LINE__, ERR_NODE_NOT_IN_CONFIG); + progError(__LINE__, NDBD_EXIT_NODE_NOT_IN_CONFIG); break; case CmRegRef::ZNOT_DEAD: jam(); - progError(__LINE__, ERR_NODE_NOT_DEAD); + progError(__LINE__, NDBD_EXIT_NODE_NOT_DEAD); break; case CmRegRef::ZELECTION: jam(); - if (cpresidentCandidate > TaddNodeno) { + if (candidate_gci > c_start.m_president_candidate_gci || + (candidate_gci == c_start.m_president_candidate_gci && + candidate < c_start.m_president_candidate)) + { jam(); //---------------------------------------- /* We may already have a candidate */ /* choose the lowest nodeno */ //---------------------------------------- signal->theData[3] = 2; - cpresidentCandidate = TaddNodeno; + c_start.m_president_candidate = candidate; + c_start.m_president_candidate_gci = candidate_gci; } else { signal->theData[3] = 4; }//if @@ -915,7 +1126,7 @@ void Qmgr::execCM_REGREF(Signal* signal) // Send this as an EVENT REPORT to inform about hearing about // other NDB node proclaiming not to be president. /*--------------------------------------------------------------*/ - signal->theData[0] = EventReport::CM_REGREF; + signal->theData[0] = NDB_LE_CM_REGREF; signal->theData[1] = getOwnNodeId(); signal->theData[2] = TaddNodeno; //----------------------------------------- @@ -923,32 +1134,34 @@ void Qmgr::execCM_REGREF(Signal* signal) //----------------------------------------- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); - if(cpresidentAlive == ZTRUE){ + if(cpresidentAlive == ZTRUE) + { jam(); - DEBUG(""); + DEBUG("cpresidentAlive"); return; } - if(c_regReqReqSent != c_regReqReqRecv){ + if(c_start.m_regReqReqSent != c_start.m_regReqReqRecv) + { jam(); - DEBUG( c_regReqReqSent << " != " << c_regReqReqRecv); + DEBUG(c_start.m_regReqReqSent << " != " << c_start.m_regReqReqRecv); return; } - if(cpresidentCandidate != getOwnNodeId()){ + if(c_start.m_president_candidate != getOwnNodeId()) + { jam(); - DEBUG(""); + DEBUG("i'm not the candidate"); return; } - + /** - * All configured nodes has agreed + * All connected nodes has agreed */ - Uint64 now = NdbTick_CurrentMillisecond(); - if((c_regReqReqRecv == cnoOfNodes) || now > c_stopElectionTime){ + if(check_startup(signal)) + { jam(); electionWon(signal); - sendSttorryLab(signal); /** * Start timer handling @@ -960,6 +1173,191 @@ void Qmgr::execCM_REGREF(Signal* signal) return; }//Qmgr::execCM_REGREF() +Uint32 +Qmgr::check_startup(Signal* signal) +{ + Uint64 now = NdbTick_CurrentMillisecond(); + Uint64 partial_timeout = c_start_election_time + c_restartPartialTimeout; + Uint64 partitioned_timeout = partial_timeout + c_restartPartionedTimeout; + + /** + * First see if we should wait more... + */ + NdbNodeBitmask tmp; + tmp.bitOR(c_start.m_skip_nodes); + tmp.bitOR(c_start.m_starting_nodes); + + NdbNodeBitmask wait; + wait.assign(c_definedNodes); + wait.bitANDC(tmp); + + Uint32 retVal = 0; + NdbNodeBitmask report_mask; + + if ((c_start.m_latest_gci == 0) || + (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START))) + { + if (!tmp.equal(c_definedNodes)) + { + jam(); + signal->theData[1] = 1; + signal->theData[2] = ~0; + report_mask.assign(wait); + retVal = 0; + goto start_report; + } + else + { + jam(); + signal->theData[1] = 0x8000; + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + } + { + const bool all = c_start.m_starting_nodes.equal(c_definedNodes); + CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0]; + + { + /** + * Check for missing node group directly + */ + char buf[100]; + NdbNodeBitmask check; + check.assign(c_definedNodes); + check.bitANDC(c_start.m_starting_nodes); // Not connected nodes + check.bitOR(c_start.m_starting_nodes_w_log); + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = check; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + if (sd->output == CheckNodeGroups::Lose) + { + jam(); + goto missing_nodegroup; + } + } + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = c_start.m_starting_nodes; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + const Uint32 result = sd->output; + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = c_start.m_starting_nodes_w_log; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + const Uint32 result_w_log = sd->output; + + if (tmp.equal(c_definedNodes)) + { + /** + * All nodes (wrt no-wait nodes) has connected... + * this means that we will now start or die + */ + jam(); + switch(result_w_log){ + case CheckNodeGroups::Lose: + { + jam(); + goto missing_nodegroup; + } + case CheckNodeGroups::Win: + signal->theData[1] = all ? 0x8001 : 0x8002; + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + case CheckNodeGroups::Partitioning: + ndbrequire(result != CheckNodeGroups::Lose); + signal->theData[1] = + all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + } + + if (now < partial_timeout) + { + jam(); + signal->theData[1] = c_restartPartialTimeout == ~0 ? 2 : 3; + signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000); + report_mask.assign(wait); + retVal = 0; + goto start_report; + } + + /** + * Start partial has passed...check for partitioning... + */ + switch(result_w_log){ + case CheckNodeGroups::Lose: + jam(); + goto missing_nodegroup; + case CheckNodeGroups::Partitioning: + if (now < partitioned_timeout && result != CheckNodeGroups::Win) + { + signal->theData[1] = c_restartPartionedTimeout == ~0 ? 4 : 5; + signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 0; + goto start_report; + } + // Fall through... + case CheckNodeGroups::Win: + signal->theData[1] = + all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + } + ndbrequire(false); + +start_report: + jam(); + { + Uint32 sz = NdbNodeBitmask::Size; + signal->theData[0] = NDB_LE_StartReport; + signal->theData[3] = sz; + Uint32* ptr = signal->theData+4; + c_definedNodes.copyto(sz, ptr); ptr += sz; + c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz; + c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz; + report_mask.copyto(sz, ptr); ptr+= sz; + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, + 4+4*NdbNodeBitmask::Size, JBB); + } + return retVal; + +missing_nodegroup: + jam(); + char buf[100], mask1[100], mask2[100]; + c_start.m_starting_nodes.getText(mask1); + tmp.assign(c_start.m_starting_nodes); + tmp.bitANDC(c_start.m_starting_nodes_w_log); + tmp.getText(mask2); + BaseString::snprintf(buf, sizeof(buf), + "Unable to start missing node group! " + " starting: %s (missing fs for: %s)", + mask1, mask2); + progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf); +} + void Qmgr::electionWon(Signal* signal){ NodeRecPtr myNodePtr; @@ -978,14 +1376,21 @@ Qmgr::electionWon(Signal* signal){ c_clusterNodes.set(getOwnNodeId()); cpresidentAlive = ZTRUE; - c_stopElectionTime = ~0; + c_start_election_time = ~0; c_start.reset(); - signal->theData[0] = EventReport::CM_REGCONF; + signal->theData[0] = NDB_LE_CM_REGCONF; signal->theData[1] = getOwnNodeId(); signal->theData[2] = cpresident; signal->theData[3] = 1; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); + + c_start.m_starting_nodes.clear(getOwnNodeId()); + if (c_start.m_starting_nodes.isclear()) + { + jam(); + sendSttorryLab(signal); + } } /* @@ -999,7 +1404,14 @@ Qmgr::electionWon(Signal* signal){ /*--------------------------------------------------------------*/ void Qmgr::regreqTimeLimitLab(Signal* signal) { - if(cpresident == ZNIL){ + if(cpresident == ZNIL) + { + if (c_start.m_president_candidate == ZNIL) + { + jam(); + c_start.m_president_candidate = getOwnNodeId(); + } + cmInfoconf010Lab(signal); } }//Qmgr::regreqTimelimitLab() @@ -1239,7 +1651,7 @@ void Qmgr::execCM_ADD(Signal* signal) jam(); ndbrequire(addNodePtr.p->phase == ZSTARTING); addNodePtr.p->phase = ZRUNNING; - addNodePtr.p->alarmCount = 0; + setNodeInfo(addNodePtr.i).m_heartbeat_cnt= 0; c_clusterNodes.set(addNodePtr.i); findNeighbours(signal); @@ -1277,7 +1689,7 @@ Qmgr::joinedCluster(Signal* signal, NodeRecPtr nodePtr){ * NODES IN THE CLUSTER. */ nodePtr.p->phase = ZRUNNING; - nodePtr.p->alarmCount = 0; + setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0; findNeighbours(signal); c_clusterNodes.set(nodePtr.i); c_start.reset(); @@ -1409,6 +1821,17 @@ void Qmgr::execCM_ACKADD(Signal* signal) */ handleArbitNdbAdd(signal, addNodePtr.i); c_start.reset(); + + if (c_start.m_starting_nodes.get(addNodePtr.i)) + { + jam(); + c_start.m_starting_nodes.clear(addNodePtr.i); + if (c_start.m_starting_nodes.isclear()) + { + jam(); + sendSttorryLab(signal); + } + } return; }//switch ndbrequire(false); @@ -1498,11 +1921,11 @@ void Qmgr::findNeighbours(Signal* signal) *---------------------------------------------------------------------*/ fnNodePtr.i = cneighbourl; ptrCheckGuard(fnNodePtr, MAX_NDB_NODES, nodeRec); - fnNodePtr.p->alarmCount = 0; + setNodeInfo(fnNodePtr.i).m_heartbeat_cnt= 0; }//if }//if - signal->theData[0] = EventReport::FIND_NEIGHBOURS; + signal->theData[0] = NDB_LE_FIND_NEIGHBOURS; signal->theData[1] = getOwnNodeId(); signal->theData[2] = cneighbourl; signal->theData[3] = cneighbourh; @@ -1546,8 +1969,8 @@ void Qmgr::initData(Signal* signal) } else { nodePtr.p->phase = ZAPI_INACTIVE; } - - nodePtr.p->alarmCount = 0; + + setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0; nodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE; nodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE; nodePtr.p->sendPresToStatus = Q_NOT_ACTIVE; @@ -1562,7 +1985,8 @@ void Qmgr::initData(Signal* signal) cnoPrepFailedNodes = 0; creadyDistCom = ZFALSE; cpresident = ZNIL; - cpresidentCandidate = ZNIL; + c_start.m_president_candidate = ZNIL; + c_start.m_president_candidate_gci = 0; cpdistref = 0; cneighbourh = ZNIL; cneighbourl = ZNIL; @@ -1590,15 +2014,33 @@ void Qmgr::initData(Signal* signal) Uint32 hbDBAPI = 1500; Uint32 arbitTimeout = 1000; c_restartPartialTimeout = 30000; + c_restartPartionedTimeout = 60000; + c_restartFailureTimeout = ~0; ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB); ndb_mgm_get_int_parameter(p, CFG_DB_API_HEARTBEAT_INTERVAL, &hbDBAPI); ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout); ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT, &c_restartPartialTimeout); - if(c_restartPartialTimeout == 0){ + ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT, + &c_restartPartionedTimeout); + ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT, + &c_restartFailureTimeout); + + if(c_restartPartialTimeout == 0) + { c_restartPartialTimeout = ~0; } + if (c_restartPartionedTimeout ==0) + { + c_restartPartionedTimeout = ~0; + } + + if (c_restartFailureTimeout == 0) + { + c_restartFailureTimeout = ~0; + } + setHbDelay(hbDBDB); setHbApiDelay(hbDBAPI); setArbitTimeout(arbitTimeout); @@ -1728,7 +2170,7 @@ void Qmgr::sendHeartbeat(Signal* signal) sendSignal(localNodePtr.p->blockRef, GSN_CM_HEARTBEAT, signal, 1, JBA); #ifdef VM_TRACE - signal->theData[0] = EventReport::SentHeartbeat; + signal->theData[0] = NDB_LE_SentHeartbeat; signal->theData[1] = localNodePtr.i; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); #endif @@ -1749,24 +2191,24 @@ void Qmgr::checkHeartbeat(Signal* signal) }//if ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec); - nodePtr.p->alarmCount ++; + setNodeInfo(nodePtr.i).m_heartbeat_cnt++; ndbrequire(nodePtr.p->phase == ZRUNNING); ndbrequire(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB); - if(nodePtr.p->alarmCount > 2){ - signal->theData[0] = EventReport::MissedHeartbeat; + if(getNodeInfo(nodePtr.i).m_heartbeat_cnt > 2){ + signal->theData[0] = NDB_LE_MissedHeartbeat; signal->theData[1] = nodePtr.i; - signal->theData[2] = nodePtr.p->alarmCount - 1; + signal->theData[2] = getNodeInfo(nodePtr.i).m_heartbeat_cnt - 1; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); } - if (nodePtr.p->alarmCount > 4) { + if (getNodeInfo(nodePtr.i).m_heartbeat_cnt > 4) { jam(); /**---------------------------------------------------------------------- * OUR LEFT NEIGHBOUR HAVE KEPT QUIET FOR THREE CONSECUTIVE HEARTBEAT * PERIODS. THUS WE DECLARE HIM DOWN. *----------------------------------------------------------------------*/ - signal->theData[0] = EventReport::DeadDueToHeartbeat; + signal->theData[0] = NDB_LE_DeadDueToHeartbeat; signal->theData[1] = nodePtr.i; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); @@ -1792,16 +2234,16 @@ void Qmgr::apiHbHandlingLab(Signal* signal) if (TnodePtr.p->phase == ZAPI_ACTIVE){ jam(); - TnodePtr.p->alarmCount ++; + setNodeInfo(TnodePtr.i).m_heartbeat_cnt++; - if(TnodePtr.p->alarmCount > 2){ - signal->theData[0] = EventReport::MissedHeartbeat; + if(getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 2){ + signal->theData[0] = NDB_LE_MissedHeartbeat; signal->theData[1] = nodeId; - signal->theData[2] = TnodePtr.p->alarmCount - 1; + signal->theData[2] = getNodeInfo(TnodePtr.i).m_heartbeat_cnt - 1; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); } - if (TnodePtr.p->alarmCount > 4) { + if (getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 4) { jam(); /*------------------------------------------------------------------*/ /* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS. @@ -1810,7 +2252,7 @@ void Qmgr::apiHbHandlingLab(Signal* signal) /*------------------------------------------------------------------*/ /* We call node_failed to release all connections for this api node */ /*------------------------------------------------------------------*/ - signal->theData[0] = EventReport::DeadDueToHeartbeat; + signal->theData[0] = NDB_LE_DeadDueToHeartbeat; signal->theData[1] = nodeId; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); @@ -1833,16 +2275,17 @@ void Qmgr::checkStartInterface(Signal* signal) ptrAss(nodePtr, nodeRec); if (nodePtr.p->phase == ZFAIL_CLOSING) { jam(); - nodePtr.p->alarmCount = nodePtr.p->alarmCount + 1; + setNodeInfo(nodePtr.i).m_heartbeat_cnt++; if (c_connectedNodes.get(nodePtr.i)){ jam(); /*-------------------------------------------------------------------*/ // We need to ensure that the connection is not restored until it has // been disconnected for at least three seconds. /*-------------------------------------------------------------------*/ - nodePtr.p->alarmCount = 0; + setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0; }//if - if ((nodePtr.p->alarmCount > 3) && (nodePtr.p->failState == NORMAL)) { + if ((getNodeInfo(nodePtr.i).m_heartbeat_cnt > 3) + && (nodePtr.p->failState == NORMAL)) { /**------------------------------------------------------------------ * WE HAVE DISCONNECTED THREE SECONDS AGO. WE ARE NOW READY TO * CONNECT AGAIN AND ACCEPT NEW REGISTRATIONS FROM THIS NODE. @@ -1858,18 +2301,18 @@ void Qmgr::checkStartInterface(Signal* signal) nodePtr.p->phase = ZINIT; }//if - nodePtr.p->alarmCount = 0; + setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0; signal->theData[0] = 0; signal->theData[1] = nodePtr.i; sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA); } else { - if(((nodePtr.p->alarmCount + 1) % 60) == 0){ + if(((getNodeInfo(nodePtr.i).m_heartbeat_cnt + 1) % 60) == 0){ char buf[100]; BaseString::snprintf(buf, sizeof(buf), "Failure handling of node %d has not completed in %d min." " - state = %d", nodePtr.i, - (nodePtr.p->alarmCount + 1)/60, + (getNodeInfo(nodePtr.i).m_heartbeat_cnt + 1)/60, nodePtr.p->failState); warningEvent(buf); } @@ -1899,7 +2342,6 @@ void Qmgr::sendApiFailReq(Signal* signal, Uint16 failedNodeNo) failedNodePtr.p->failState = WAITING_FOR_FAILCONF1; sendSignal(DBTC_REF, GSN_API_FAILREQ, signal, 2, JBA); sendSignal(DBDICT_REF, GSN_API_FAILREQ, signal, 2, JBA); - sendSignal(SUMA_REF, GSN_API_FAILREQ, signal, 2, JBA); /**------------------------------------------------------------------------- * THE OTHER NODE WAS AN API NODE. THE COMMUNICATION LINK IS ALREADY @@ -1907,7 +2349,7 @@ void Qmgr::sendApiFailReq(Signal* signal, Uint16 failedNodeNo) * WE ONLY NEED TO SET PARAMETERS TO ENABLE A NEW CONNECTION IN A FEW * SECONDS. *-------------------------------------------------------------------------*/ - failedNodePtr.p->alarmCount = 0; + setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0; CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0]; @@ -2015,31 +2457,52 @@ void Qmgr::execNDB_FAILCONF(Signal* signal) /*******************************/ /* DISCONNECT_REP */ /*******************************/ +const char *lookupConnectionError(Uint32 err); + void Qmgr::execDISCONNECT_REP(Signal* signal) { jamEntry(); const DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0]; const Uint32 nodeId = rep->nodeId; + const Uint32 err = rep->err; c_connectedNodes.clear(nodeId); c_readnodes_nodes.clear(nodeId); NodeRecPtr nodePtr; nodePtr.i = getOwnNodeId(); ptrCheckGuard(nodePtr, MAX_NODES, nodeRec); + + char buf[100]; + if (getNodeInfo(nodeId).getType() == NodeInfo::DB && + getNodeState().startLevel < NodeState::SL_STARTED) + { + jam(); + CRASH_INSERTION(932); + BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId); + progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); + ndbrequire(false); + } + switch(nodePtr.p->phase){ case ZRUNNING: jam(); break; case ZINIT: + ndbrequire(false); case ZSTARTING: + progError(__LINE__, NDBD_EXIT_CONNECTION_SETUP_FAILED, + lookupConnectionError(err)); + ndbrequire(false); case ZPREPARE_FAIL: + ndbrequire(false); case ZFAIL_CLOSING: + ndbrequire(false); case ZAPI_ACTIVE: + ndbrequire(false); case ZAPI_INACTIVE: { - char buf[100]; - BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); - progError(__LINE__, ERR_SR_OTHERNODEFAILED, buf); + BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId); + progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); ndbrequire(false); } } @@ -2081,7 +2544,7 @@ void Qmgr::node_failed(Signal* signal, Uint16 aFailedNode) /*---------------------------------------------------------------------*/ failedNodePtr.p->failState = NORMAL; failedNodePtr.p->phase = ZFAIL_CLOSING; - failedNodePtr.p->alarmCount = 0; + setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0; CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0]; @@ -2175,8 +2638,8 @@ void Qmgr::execAPI_REGREQ(Signal* signal) } setNodeInfo(apiNodePtr.i).m_version = version; - - apiNodePtr.p->alarmCount = 0; + + setNodeInfo(apiNodePtr.i).m_heartbeat_cnt= 0; ApiRegConf * const apiRegConf = (ApiRegConf *)&signal->theData[0]; apiRegConf->qmgrRef = reference(); @@ -2217,29 +2680,32 @@ void Qmgr::execAPI_REGREQ(Signal* signal) }//Qmgr::execAPI_REGREQ() -void +void Qmgr::execAPI_VERSION_REQ(Signal * signal) { jamEntry(); ApiVersionReq * const req = (ApiVersionReq *)signal->getDataPtr(); - + Uint32 senderRef = req->senderRef; Uint32 nodeId = req->nodeId; ApiVersionConf * conf = (ApiVersionConf *)req; if(getNodeInfo(nodeId).m_connected) + { conf->version = getNodeInfo(nodeId).m_version; + struct in_addr in= globalTransporterRegistry.get_connect_address(nodeId); + conf->inet_addr= in.s_addr; + } else + { conf->version = 0; + conf->inet_addr= 0; + } conf->nodeId = nodeId; - struct in_addr in= globalTransporterRegistry.get_connect_address(nodeId); - conf->inet_addr= in.s_addr; - sendSignal(senderRef, + sendSignal(senderRef, GSN_API_VERSION_CONF, signal, ApiVersionConf::SignalLength, JBB); - - } @@ -2330,7 +2796,7 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, break; case FailRep::ZPARTITIONED_CLUSTER: { - code = ERR_ARBIT_SHUTDOWN; + code = NDBD_EXIT_PARTITIONED_SHUTDOWN; char buf1[100], buf2[100]; c_clusterNodes.getText(buf1); if (signal->getLength()== FailRep::SignalLength + FailRep::ExtraLength && @@ -2341,20 +2807,23 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, part.assign(NdbNodeBitmask::Size, rep->partition); part.getText(buf2); BaseString::snprintf(extra, sizeof(extra), - "Partitioned cluster!" - " Our cluster: %s other cluster: %s", + "Our cluster: %s other cluster: %s", buf1, buf2); } else { jam(); BaseString::snprintf(extra, sizeof(extra), - "Partitioned cluster!" - " Our cluster: %s ", buf1); + "Our cluster: %s", buf1); } msg = extra; break; } + case FailRep::ZMULTI_NODE_SHUTDOWN: + msg = "Multi node shutdown"; + break; + default: + msg = "<UNKNOWN>"; } CRASH_INSERTION(932); @@ -2773,7 +3242,7 @@ void Qmgr::execCOMMIT_FAILREQ(Signal* signal) ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec); nodePtr.p->phase = ZFAIL_CLOSING; nodePtr.p->failState = WAITING_FOR_NDB_FAILCONF; - nodePtr.p->alarmCount = 0; + setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0; c_clusterNodes.clear(nodePtr.i); }//for /*----------------------------------------------------------------------*/ @@ -2975,7 +3444,7 @@ void Qmgr::systemErrorBecauseOtherNodeFailed(Signal* signal, Uint32 line, "Node was shutdown during startup because node %d failed", failedNodeId); - progError(line, ERR_SR_OTHERNODEFAILED, buf); + progError(line, NDBD_EXIT_SR_OTHERNODEFAILED, buf); } @@ -2987,7 +3456,7 @@ void Qmgr::systemErrorLab(Signal* signal, Uint32 line, const char * message) // If it's known why shutdown occured // an error message has been passed to this function - progError(line, 0, message); + progError(line, NDBD_EXIT_NDBREQUIRE, message); return; }//Qmgr::systemErrorLab() @@ -3031,7 +3500,7 @@ void Qmgr::failReport(Signal* signal, failedNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE; failedNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE; failedNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE; - failedNodePtr.p->alarmCount = 0; + setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0; if (aSendFailRep == ZTRUE) { jam(); if (failedNodePtr.i != getOwnNodeId()) { @@ -3356,7 +3825,7 @@ Qmgr::handleArbitApiFail(Signal* signal, Uint16 nodeId) jam(); return; } - reportArbitEvent(signal, EventReport::ArbitState); + reportArbitEvent(signal, NDB_LE_ArbitState); arbitRec.node = 0; switch (arbitRec.state) { case ARBIT_NULL: // should not happen @@ -3527,7 +3996,7 @@ Qmgr::handleArbitCheck(Signal* signal) arbitRec.newstate = true; break; } - reportArbitEvent(signal, EventReport::ArbitResult); + reportArbitEvent(signal, NDB_LE_ArbitResult); switch (arbitRec.state) { default: jam(); @@ -3554,7 +4023,7 @@ Qmgr::startArbitThread(Signal* signal) jam(); ndbrequire(cpresident == getOwnNodeId()); arbitRec.code = ArbitCode::ThreadStart; - reportArbitEvent(signal, EventReport::ArbitState); + reportArbitEvent(signal, NDB_LE_ArbitState); signal->theData[1] = ++arbitRec.thread; runArbitThread(signal); } @@ -3798,7 +4267,7 @@ Qmgr::execARBIT_PREPREQ(Signal* signal) arbitRec.node = sd->node; arbitRec.ticket = sd->ticket; arbitRec.code = sd->code; - reportArbitEvent(signal, EventReport::ArbitState); + reportArbitEvent(signal, NDB_LE_ArbitState); arbitRec.state = ARBIT_RUN; arbitRec.newstate = true; if (sd->code == ArbitCode::PrepAtrun) { @@ -3885,7 +4354,7 @@ Qmgr::stateArbitStart(Signal* signal) } if (arbitRec.recvCount) { jam(); - reportArbitEvent(signal, EventReport::ArbitState); + reportArbitEvent(signal, NDB_LE_ArbitState); if (arbitRec.code == ArbitCode::ApiStart) { jam(); arbitRec.state = ARBIT_RUN; @@ -3899,7 +4368,7 @@ Qmgr::stateArbitStart(Signal* signal) if (arbitRec.getTimediff() > getArbitTimeout()) { jam(); arbitRec.code = ArbitCode::ErrTimeout; - reportArbitEvent(signal, EventReport::ArbitState); + reportArbitEvent(signal, NDB_LE_ArbitState); arbitRec.state = ARBIT_INIT; arbitRec.newstate = true; return; @@ -4006,7 +4475,7 @@ Qmgr::stateArbitChoose(Signal* signal) } if (arbitRec.recvCount) { jam(); - reportArbitEvent(signal, EventReport::ArbitResult); + reportArbitEvent(signal, NDB_LE_ArbitResult); if (arbitRec.code == ArbitCode::WinChoose) { jam(); sendCommitFailReq(signal); // start commit of failed nodes @@ -4022,7 +4491,7 @@ Qmgr::stateArbitChoose(Signal* signal) if (arbitRec.getTimediff() > getArbitTimeout()) { jam(); arbitRec.code = ArbitCode::ErrTimeout; - reportArbitEvent(signal, EventReport::ArbitState); + reportArbitEvent(signal, NDB_LE_ArbitState); arbitRec.state = ARBIT_CRASH; arbitRec.newstate = true; stateArbitCrash(signal); // do it at once @@ -4083,8 +4552,8 @@ Qmgr::stateArbitCrash(Signal* signal) return; #endif CRASH_INSERTION(932); - - progError(__LINE__, ERR_ARBIT_SHUTDOWN, "Arbitrator decided to shutdown this node"); + progError(__LINE__, NDBD_EXIT_ARBIT_SHUTDOWN, + "Arbitrator decided to shutdown this node"); } /** @@ -4125,7 +4594,7 @@ Qmgr::computeArbitNdbMask(NodeBitmask& aMask) * where sender (word 0) is event type. */ void -Qmgr::reportArbitEvent(Signal* signal, EventReport::EventType type) +Qmgr::reportArbitEvent(Signal* signal, Ndb_logevent_type type) { ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0]; sd->sender = type; @@ -4146,8 +4615,10 @@ Qmgr::execDUMP_STATE_ORD(Signal* signal) case 1: infoEvent("creadyDistCom = %d, cpresident = %d\n", creadyDistCom, cpresident); - infoEvent("cpresidentAlive = %d, cpresidentCand = %d\n", - cpresidentAlive, cpresidentCandidate); + infoEvent("cpresidentAlive = %d, cpresidentCand = %d (gci: %d)\n", + cpresidentAlive, + c_start.m_president_candidate, + c_start.m_president_candidate_gci); infoEvent("ctoStatus = %d\n", ctoStatus); for(Uint32 i = 1; i<MAX_NDB_NODES; i++){ if(getNodeInfo(i).getType() == NodeInfo::DB){ diff --git a/ndb/src/kernel/blocks/suma/Suma.cpp b/ndb/src/kernel/blocks/suma/Suma.cpp index 84a59f440d9..449436331e4 100644 --- a/ndb/src/kernel/blocks/suma/Suma.cpp +++ b/ndb/src/kernel/blocks/suma/Suma.cpp @@ -50,6 +50,17 @@ //#define EVENT_DEBUG //#define EVENT_PH3_DEBUG //#define EVENT_DEBUG2 +#if 0 +#undef DBUG_ENTER +#undef DBUG_PRINT +#undef DBUG_RETURN +#undef DBUG_VOID_RETURN + +#define DBUG_ENTER(a) {ndbout_c("%s:%d >%s", __FILE__, __LINE__, a);} +#define DBUG_PRINT(a,b) {ndbout << __FILE__ << ":" << __LINE__ << " " << a << ": "; ndbout_c b ;} +#define DBUG_RETURN(a) { ndbout_c("%s:%d <", __FILE__, __LINE__); return(a); } +#define DBUG_VOID_RETURN { ndbout_c("%s:%d <", __FILE__, __LINE__); return; } +#endif /** * @todo: @@ -71,61 +82,73 @@ static const Uint32 SUMA_SEQUENCE = 0xBABEBABE; #define PRINT_ONLY 0 static Uint32 g_TypeOfStart = NodeState::ST_ILLEGAL_TYPE; -void -Suma::getNodeGroupMembers(Signal* signal) { - jam(); +void +Suma::execREAD_CONFIG_REQ(Signal* signal) +{ + jamEntry(); + + const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr(); + + Uint32 ref = req->senderRef; + Uint32 senderData = req->senderData; + + const ndb_mgm_configuration_iterator * p = + theConfiguration.getOwnConfigIterator(); + ndbrequire(p != 0); + + // SumaParticipant + Uint32 noTables; + ndb_mgm_get_int_parameter(p, CFG_DB_NO_TABLES, + &noTables); + /** - * Ask DIH for nodeGroupMembers + * @todo: fix pool sizes */ - CheckNodeGroups * sd = (CheckNodeGroups*)signal->getDataPtrSend(); - sd->blockRef = reference(); - sd->requestType = - CheckNodeGroups::Direct | - CheckNodeGroups::GetNodeGroupMembers; - sd->nodeId = getOwnNodeId(); - EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, - CheckNodeGroups::SignalLength); - jamEntry(); + c_tablePool_.setSize(noTables); + c_tables.setSize(noTables); - c_nodeGroup = sd->output; - c_noNodesInGroup = 0; - for (int i = 0; i < MAX_NDB_NODES; i++) { - if (sd->mask.get(i)) { - if (i == getOwnNodeId()) c_idInNodeGroup = c_noNodesInGroup; - c_nodesInGroup[c_noNodesInGroup] = i; - c_noNodesInGroup++; - } + c_subscriptions.setSize(20); //10 + c_subscriberPool.setSize(64); + + c_subscriptionPool.setSize(64); //2 + c_syncPool.setSize(20); //2 + c_dataBufferPool.setSize(128); + + { + SLList<SyncRecord> tmp(c_syncPool); + Ptr<SyncRecord> ptr; + while(tmp.seize(ptr)) + new (ptr.p) SyncRecord(* this, c_dataBufferPool); + tmp.release(); } - // ndbout_c("c_noNodesInGroup=%d", c_noNodesInGroup); - ndbrequire(c_noNodesInGroup > 0); // at least 1 node in the nodegroup + // Suma + c_nodePool.setSize(MAX_NDB_NODES); + c_masterNodeId = getOwnNodeId(); -#ifdef NODEFAIL_DEBUG - for (Uint32 i = 0; i < c_noNodesInGroup; i++) { - ndbout_c ("Suma: NodeGroup %u, me %u, me in group %u, member[%u] %u", - c_nodeGroup, getOwnNodeId(), c_idInNodeGroup, - i, c_nodesInGroup[i]); + c_nodeGroup = c_noNodesInGroup = c_idInNodeGroup = 0; + for (int i = 0; i < MAX_REPLICAS; i++) { + c_nodesInGroup[i] = 0; } -#endif + + c_subCoordinatorPool.setSize(10); + + ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend(); + conf->senderRef = reference(); + conf->senderData = senderData; + sendSignal(ref, GSN_READ_CONFIG_CONF, signal, + ReadConfigConf::SignalLength, JBB); } void Suma::execSTTOR(Signal* signal) { jamEntry(); - + + DBUG_ENTER("Suma::execSTTOR"); const Uint32 startphase = signal->theData[1]; const Uint32 typeOfStart = signal->theData[7]; -#ifdef NODEFAIL_DEBUG - ndbout_c ("SUMA::execSTTOR startphase = %u, typeOfStart = %u", - startphase, typeOfStart); - -#endif - - if(startphase == 1){ - jam(); - c_restartLock = true; - } + DBUG_PRINT("info",("startphase = %u, typeOfStart = %u", startphase, typeOfStart)); if(startphase == 3){ jam(); @@ -155,67 +178,39 @@ Suma::execSTTOR(Signal* signal) { g_subPtrI = subPtr.i; // sendSTTORRY(signal); #endif - return; + DBUG_VOID_RETURN; } - if(startphase == 5) { - getNodeGroupMembers(signal); - if (g_TypeOfStart == NodeState::ST_NODE_RESTART) { - jam(); - for (Uint32 i = 0; i < c_noNodesInGroup; i++) { - Uint32 ref = calcSumaBlockRef(c_nodesInGroup[i]); - if (ref != reference()) - sendSignal(ref, GSN_SUMA_START_ME, signal, - 1 /*SumaStartMe::SignalLength*/, JBB); - } - } - } - if(startphase == 7) { - c_restartLock = false; // may be set false earlier with HANDOVER_REQ - - if (g_TypeOfStart != NodeState::ST_NODE_RESTART) { - for( int i = 0; i < NO_OF_BUCKETS; i++) { - if (getResponsibleSumaNodeId(i) == refToNode(reference())) { - // I'm running this bucket -#ifdef EVENT_DEBUG - ndbout_c("bucket %u set to true", i); -#endif - c_buckets[i].active = true; - } - } - } - if(g_TypeOfStart == NodeState::ST_INITIAL_START && c_masterNodeId == getOwnNodeId()) { jam(); createSequence(signal); - return; + DBUG_VOID_RETURN; }//if }//if sendSTTORRY(signal); - return; + DBUG_VOID_RETURN; } void Suma::createSequence(Signal* signal) { jam(); + DBUG_ENTER("Suma::createSequence"); UtilSequenceReq * req = (UtilSequenceReq*)signal->getDataPtrSend(); req->senderData = RNIL; req->sequenceId = SUMA_SEQUENCE; req->requestType = UtilSequenceReq::Create; -#ifdef DEBUG_SUMA_SEQUENCE - ndbout_c("SUMA: Create sequence"); -#endif sendSignal(DBUTIL_REF, GSN_UTIL_SEQUENCE_REQ, signal, UtilSequenceReq::SignalLength, JBB); // execUTIL_SEQUENCE_CONF will call createSequenceReply() + DBUG_VOID_RETURN; } void @@ -267,40 +262,6 @@ Suma::execREAD_NODESCONF(Signal* signal){ sendSTTORRY(signal); } -#if 0 -void -Suma::execREAD_CONFIG_REQ(Signal* signal) -{ - const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr(); - Uint32 ref = req->senderRef; - Uint32 senderData = req->senderData; - ndbrequire(req->noOfParameters == 0); - - jamEntry(); - - const ndb_mgm_configuration_iterator * p = - theConfiguration.getOwnConfigIterator(); - ndbrequire(p != 0); - - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_NO_REDOLOG_FILES, - &cnoLogFiles)); - ndbrequire(cnoLogFiles > 0); - - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_LQH_FRAG, &cfragrecFileSize)); - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_LQH_TABLE, &ctabrecFileSize)); - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_LQH_TC_CONNECT, - &ctcConnectrecFileSize)); - clogFileFileSize = 4 * cnoLogFiles; - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_LQH_SCAN, &cscanrecFileSize)); - cmaxAccOps = cscanrecFileSize * MAX_PARALLEL_SCANS_PER_FRAG; - - initRecords(); - initialiseRecordsLab(signal, 0, ref, senderData); - - return; -}//Dblqh::execSIZEALT_REP() -#endif - void Suma::sendSTTORRY(Signal* signal){ signal->theData[0] = 0; @@ -335,269 +296,6 @@ SumaParticipant::execCONTINUEB(Signal* signal) * *****************************************************************************/ -void Suma::execAPI_FAILREQ(Signal* signal) -{ - jamEntry(); - Uint32 failedApiNode = signal->theData[0]; - //BlockReference retRef = signal->theData[1]; - - c_failedApiNodes.set(failedApiNode); - bool found = removeSubscribersOnNode(signal, failedApiNode); - - if(!found){ - jam(); - c_failedApiNodes.clear(failedApiNode); - } -}//execAPI_FAILREQ() - -bool -SumaParticipant::removeSubscribersOnNode(Signal *signal, Uint32 nodeId) -{ - bool found = false; - - SubscriberPtr i_subbPtr; - c_dataSubscribers.first(i_subbPtr); - while(!i_subbPtr.isNull()){ - SubscriberPtr subbPtr = i_subbPtr; - c_dataSubscribers.next(i_subbPtr); - jam(); - if (refToNode(subbPtr.p->m_subscriberRef) == nodeId) { - jam(); - c_dataSubscribers.remove(subbPtr); - c_removeDataSubscribers.add(subbPtr); - found = true; - } - } - if(found){ - jam(); - sendSubStopReq(signal); - } - return found; -} - -void -SumaParticipant::sendSubStopReq(Signal *signal){ - static bool remove_lock = false; - jam(); - - if(remove_lock) { - jam(); - return; - } - remove_lock = true; - - SubscriberPtr subbPtr; - c_removeDataSubscribers.first(subbPtr); - if (subbPtr.isNull()){ - jam(); -#if 0 - signal->theData[0] = failedApiNode; - signal->theData[1] = reference(); - sendSignal(retRef, GSN_API_FAILCONF, signal, 2, JBB); -#endif - c_failedApiNodes.clear(); - - remove_lock = false; - return; - } - - SubscriptionPtr subPtr; - c_subscriptions.getPtr(subPtr, subbPtr.p->m_subPtrI); - - SubStopReq * const req = (SubStopReq*)signal->getDataPtrSend(); - req->senderRef = reference(); - req->senderData = subbPtr.i; - req->subscriberRef = subbPtr.p->m_subscriberRef; - req->subscriberData = subbPtr.p->m_subscriberData; - req->subscriptionId = subPtr.p->m_subscriptionId; - req->subscriptionKey = subPtr.p->m_subscriptionKey; - req->part = SubscriptionData::TableData; - - sendSignal(SUMA_REF, GSN_SUB_STOP_REQ, signal, SubStopReq::SignalLength, JBB); -} - -void -SumaParticipant::execSUB_STOP_CONF(Signal* signal){ - jamEntry(); - - SubStopConf * const conf = (SubStopConf*)signal->getDataPtr(); - - // Uint32 subscriberData = conf->subscriberData; - // Uint32 subscriberRef = conf->subscriberRef; - - Subscription key; - key.m_subscriptionId = conf->subscriptionId; - key.m_subscriptionKey = conf->subscriptionKey; - - SubscriptionPtr subPtr; - if(c_subscriptions.find(subPtr, key)) { - jam(); - if (subPtr.p->m_markRemove) { - jam(); - ndbrequire(false); - ndbrequire(subPtr.p->m_nSubscribers > 0); - subPtr.p->m_nSubscribers--; - if (subPtr.p->m_nSubscribers == 0){ - jam(); - completeSubRemoveReq(signal, subPtr); - } - } - } - - sendSubStopReq(signal); -} - -void -SumaParticipant::execSUB_STOP_REF(Signal* signal){ - jamEntry(); - SubStopRef * const ref = (SubStopRef*)signal->getDataPtr(); - - Uint32 subscriptionId = ref->subscriptionId; - Uint32 subscriptionKey = ref->subscriptionKey; - Uint32 part = ref->part; - Uint32 subscriberData = ref->subscriberData; - Uint32 subscriberRef = ref->subscriberRef; - // Uint32 err = ref->err; - - if(!ref->isTemporary()){ - ndbrequire(false); - } - - SubStopReq * const req = (SubStopReq*)signal->getDataPtrSend(); - req->subscriberRef = subscriberRef; - req->subscriberData = subscriberData; - req->subscriptionId = subscriptionId; - req->subscriptionKey = subscriptionKey; - req->part = part; - - sendSignal(SUMA_REF, GSN_SUB_STOP_REQ, signal, SubStopReq::SignalLength, JBB); -} - -void -Suma::execNODE_FAILREP(Signal* signal){ - jamEntry(); - - NodeFailRep * const rep = (NodeFailRep*)signal->getDataPtr(); - - bool changed = false; - - NodePtr nodePtr; -#ifdef NODEFAIL_DEBUG - ndbout_c("Suma: nodefailrep"); -#endif - c_nodeFailGCI = getFirstGCI(signal); - - for(c_nodes.first(nodePtr); nodePtr.i != RNIL; c_nodes.next(nodePtr)){ - if(NodeBitmask::get(rep->theNodes, nodePtr.p->nodeId)){ - if(nodePtr.p->alive){ - ndbassert(c_aliveNodes.get(nodePtr.p->nodeId)); - changed = true; - jam(); - } else { - ndbassert(!c_aliveNodes.get(nodePtr.p->nodeId)); - jam(); - } - - if (c_preparingNodes.get(nodePtr.p->nodeId)) { - jam(); - // we are currently preparing this node that died - // it's ok just to clear and go back to waiting for it to start up - Restart.resetNode(calcSumaBlockRef(nodePtr.p->nodeId)); - c_preparingNodes.clear(nodePtr.p->nodeId); - } else if (c_handoverToDo) { - jam(); - // TODO what if I'm a SUMA that is currently restarting and the SUMA - // responsible for restarting me is the one that died? - - // a node has failed whilst handover is going on - // let's check if we're in the process of handover with that node - c_handoverToDo = false; - for( int i = 0; i < NO_OF_BUCKETS; i++) { - if (c_buckets[i].handover) { - // I'm doing handover, but is it with the dead node? - if (getResponsibleSumaNodeId(i) == nodePtr.p->nodeId) { - // so it was the dead node, has handover started? - if (c_buckets[i].handover_started) { - jam(); - // we're not ok and will have lost data! - // set not active to indicate this - - // this will generate takeover behaviour - c_buckets[i].active = false; - c_buckets[i].handover_started = false; - } // else we're ok to revert back to state before - c_buckets[i].handover = false; - } else { - jam(); - // ok, we're doing handover with a different node - c_handoverToDo = true; - } - } - } - } - - c_failoverBuffer.nodeFailRep(); - - nodePtr.p->alive = 0; - c_aliveNodes.clear(nodePtr.p->nodeId); // this has to be done after the loop above - } - } -} - -void -Suma::execINCL_NODEREQ(Signal* signal){ - jamEntry(); - - //const Uint32 senderRef = signal->theData[0]; - const Uint32 inclNode = signal->theData[1]; - - NodePtr node; - for(c_nodes.first(node); node.i != RNIL; c_nodes.next(node)){ - jam(); - const Uint32 nodeId = node.p->nodeId; - if(inclNode == nodeId){ - jam(); - - ndbrequire(node.p->alive == 0); - ndbrequire(!c_aliveNodes.get(nodeId)); - - for (Uint32 j = 0; j < c_noNodesInGroup; j++) { - jam(); - if (c_nodesInGroup[j] == nodeId) { - // the starting node is part of my node group - jam(); - c_preparingNodes.set(nodeId); // set as being prepared - for (Uint32 i = 0; i < c_noNodesInGroup; i++) { - jam(); - if (i == c_idInNodeGroup) { - jam(); - // I'm responsible for restarting this SUMA - // ALL dict's should have meta data info so it is ok to start - Restart.startNode(signal, calcSumaBlockRef(nodeId)); - break; - }//if - if (c_aliveNodes.get(c_nodesInGroup[i])) { - jam(); - break; // another Suma takes care of this - }//if - }//for - break; - }//if - }//for - - node.p->alive = 1; - c_aliveNodes.set(nodeId); - - break; - }//if - }//for - -#if 0 // if we include this DIH's got to be prepared, later if needed... - signal->theData[0] = reference(); - - sendSignal(senderRef, GSN_INCL_NODECONF, signal, 1, JBB); -#endif -} - void Suma::execSIGNAL_DROPPED_REP(Signal* signal){ jamEntry(); @@ -610,6 +308,19 @@ Suma::execSIGNAL_DROPPED_REP(Signal* signal){ * */ +static unsigned +count_subscribers(const DLList<SumaParticipant::Subscriber> &subs) +{ + unsigned n= 0; + SumaParticipant::SubscriberPtr i_subbPtr; + subs.first(i_subbPtr); + while(!i_subbPtr.isNull()){ + n++; + subs.next(i_subbPtr); + } + return n; +} + void Suma::execDUMP_STATE_ORD(Signal* signal){ jamEntry(); @@ -630,10 +341,6 @@ Suma::execDUMP_STATE_ORD(Signal* signal){ syncPtr.p->startScan(signal); } - if(tCase == 8002){ - syncPtr.p->startTrigger(signal); - } - if(tCase == 8003){ subPtr.p->m_subscriptionType = SubCreateReq::SingleTableScan; LocalDataBuffer<15> attrs(c_dataBufferPool, syncPtr.p->m_attributeList); @@ -664,6 +371,15 @@ Suma::execDUMP_STATE_ORD(Signal* signal){ infoEvent("Suma: c_dataBufferPool size: %d free: %d", c_dataBufferPool.getSize(), c_dataBufferPool.getNoOfFree()); + + infoEvent("Suma: c_metaSubscribers count: %d", + count_subscribers(c_metaSubscribers)); + infoEvent("Suma: c_dataSubscribers count: %d", + count_subscribers(c_dataSubscribers)); + infoEvent("Suma: c_prepDataSubscribers count: %d", + count_subscribers(c_prepDataSubscribers)); + infoEvent("Suma: c_removeDataSubscribers count: %d", + count_subscribers(c_removeDataSubscribers)); } } @@ -812,16 +528,14 @@ Suma::execUTIL_SEQUENCE_CONF(Signal* signal) { jamEntry(); + DBUG_ENTER("Suma::execUTIL_SEQUENCE_CONF"); CRASH_INSERTION(13002); UtilSequenceConf * conf = (UtilSequenceConf*)signal->getDataPtr(); -#ifdef DEBUG_SUMA_SEQUENCE - ndbout_c("SUMA: Create sequence conf"); -#endif if(conf->requestType == UtilSequenceReq::Create) { jam(); createSequenceReply(signal, conf, NULL); - return; + DBUG_VOID_RETURN; } Uint64 subId; @@ -841,18 +555,21 @@ Suma::execUTIL_SEQUENCE_CONF(Signal* signal) CreateSubscriptionIdConf::SignalLength, JBB); c_subscriberPool.release(subbPtr); + + DBUG_VOID_RETURN; } void Suma::execUTIL_SEQUENCE_REF(Signal* signal) { jamEntry(); + DBUG_ENTER("Suma::execUTIL_SEQUENCE_REF"); UtilSequenceRef * ref = (UtilSequenceRef*)signal->getDataPtr(); if(ref->requestType == UtilSequenceReq::Create) { jam(); createSequenceReply(signal, NULL, ref); - return; + DBUG_VOID_RETURN; } Uint32 subData = ref->senderData; @@ -861,7 +578,7 @@ Suma::execUTIL_SEQUENCE_REF(Signal* signal) c_subscriberPool.getPtr(subbPtr,subData); sendSubIdRef(signal, GrepError::SEQUENCE_ERROR); c_subscriberPool.release(subbPtr); - return; + DBUG_VOID_RETURN; }//execUTIL_SEQUENCE_REF() @@ -1089,26 +806,6 @@ SumaParticipant::sendSubCreateRef(Signal* signal, const SubCreateReq& req, Uint3 return; } - - - - - - - - - - - -Uint32 -SumaParticipant::getFirstGCI(Signal* signal) { - if (c_lastCompleteGCI == RNIL) { - ndbout_c("WARNING: c_lastCompleteGCI == RNIL"); - return 0; - } - return c_lastCompleteGCI+3; -} - /********************************************************** * * Setting upp trigger for subscription @@ -1154,27 +851,6 @@ SumaParticipant::execSUB_SYNC_REQ(Signal* signal) { case SubscriptionData::MetaData: ok = true; jam(); - if (subPtr.p->m_subscriptionType == SubCreateReq::DatabaseSnapshot) { - TableList::DataBufferIterator it; - syncPtr.p->m_tableList.first(it); - if(it.isNull()) { - /** - * Get all tables from dict - */ - ListTablesReq * req = (ListTablesReq*)signal->getDataPtrSend(); - req->senderRef = reference(); - req->senderData = syncPtr.i; - req->requestData = 0; - /** - * @todo: accomodate scan of index tables? - */ - req->setTableType(DictTabInfo::UserTable); - - sendSignal(DBDICT_REF, GSN_LIST_TABLES_REQ, signal, - ListTablesReq::SignalLength, JBB); - break; - } - } syncPtr.p->startMeta(signal); break; @@ -1209,16 +885,6 @@ SumaParticipant::sendSubSyncRef(Signal* signal, Uint32 errCode){ */ void -SumaParticipant::execLIST_TABLES_CONF(Signal* signal){ - jamEntry(); - CRASH_INSERTION(13005); - ListTablesConf* const conf = (ListTablesConf*)signal->getDataPtr(); - SyncRecord* tmp = c_syncPool.getPtr(conf->senderData); - tmp->runLIST_TABLES_CONF(signal); -} - - -void SumaParticipant::execGET_TABINFOREF(Signal* signal){ jamEntry(); GetTabInfoRef* const ref = (GetTabInfoRef*)signal->getDataPtr(); @@ -1426,110 +1092,12 @@ SumaParticipant::execDIGETPRIMCONF(Signal* signal){ tmp->runDIGETPRIMCONF(signal); } -void -SumaParticipant::execCREATE_TRIG_CONF(Signal* signal){ - jamEntry(); - - CRASH_INSERTION(13009); - - CreateTrigConf * const conf = (CreateTrigConf*)signal->getDataPtr(); - - const Uint32 senderData = conf->getConnectionPtr(); - SyncRecord* tmp = c_syncPool.getPtr(senderData); - tmp->runCREATE_TRIG_CONF(signal); - - /** - * dodido - * @todo: I (Johan) dont know what to do here. Jonas, what do you mean? - */ -} - -void -SumaParticipant::execCREATE_TRIG_REF(Signal* signal){ - jamEntry(); - ndbrequire(false); -} - -void -SumaParticipant::execDROP_TRIG_CONF(Signal* signal){ - jamEntry(); - - CRASH_INSERTION(13010); - - DropTrigConf * const conf = (DropTrigConf*)signal->getDataPtr(); - - const Uint32 senderData = conf->getConnectionPtr(); - SyncRecord* tmp = c_syncPool.getPtr(senderData); - tmp->runDROP_TRIG_CONF(signal); -} - -void -SumaParticipant::execDROP_TRIG_REF(Signal* signal){ - jamEntry(); - - DropTrigRef * const ref = (DropTrigRef*)signal->getDataPtr(); - - const Uint32 senderData = ref->getConnectionPtr(); - SyncRecord* tmp = c_syncPool.getPtr(senderData); - tmp->runDROP_TRIG_CONF(signal); -} - /************************************************************************* * * */ void -SumaParticipant::SyncRecord::runLIST_TABLES_CONF(Signal* signal){ - jam(); - - ListTablesConf * const conf = (ListTablesConf*)signal->getDataPtr(); - const Uint32 len = signal->length() - ListTablesConf::HeaderLength; - - SubscriptionPtr subPtr; - suma.c_subscriptions.getPtr(subPtr, m_subscriptionPtrI); - - for (unsigned i = 0; i < len; i++) { - subPtr.p->m_maxTables++; - suma.addTableId(ListTablesConf::getTableId(conf->tableData[i]), subPtr, this); - } - - // for (unsigned i = 0; i < len; i++) - // conf->tableData[i] = ListTablesConf::getTableId(conf->tableData[i]); - // m_tableList.append(&conf->tableData[0], len); - -#if 0 - TableList::DataBufferIterator it; - int i = 0; - for(m_tableList.first(it);!it.isNull();m_tableList.next(it)) { - ndbout_c("%u listtableconf tableid %d", i++, *it.data); - } -#endif - - if(len == ListTablesConf::DataLength){ - jam(); - // we expect more LIST_TABLE_CONF - return; - } - -#if 0 - subPtr.p->m_currentTable = 0; - subPtr.p->m_maxTables = 0; - - TableList::DataBufferIterator it; - for(m_tableList.first(it); !it.isNull(); m_tableList.next(it)) { - subPtr.p->m_maxTables++; - suma.addTableId(*it.data, subPtr, NULL); -#ifdef NODEFAIL_DEBUG - ndbout_c(" listtableconf tableid %d",*it.data); -#endif - } -#endif - - startMeta(signal); -} - -void SumaParticipant::SyncRecord::startMeta(Signal* signal){ jam(); m_currentTable = 0; @@ -1628,18 +1196,6 @@ SumaParticipant::SyncRecord::runGET_TABINFO_CONF(Signal* signal){ SegmentedSectionPtr ptr; signal->getSection(ptr, GetTabInfoConf::DICT_TAB_INFO); - SubMetaData * data = (SubMetaData*)signal->getDataPtrSend(); - /** - * sending lastCompleteGCI. Used by Lars in interval calculations - * incremenet by one, since last_CompleteGCI is the not the current gci. - */ - data->gci = suma.c_lastCompleteGCI + 1; - data->tableId = tableId; - data->senderData = subPtr.p->m_subscriberData; -#if PRINT_ONLY - ndbout_c("GSN_SUB_META_DATA Table %d", tableId); -#else - bool okToSend = m_doSendSyncData; /* @@ -1669,7 +1225,6 @@ SumaParticipant::SyncRecord::runGET_TABINFO_CONF(Signal* signal){ SubMetaData::SignalLength, JBB); } } -#endif TablePtr tabPtr; ndbrequire(suma.c_tables.find(tabPtr, tableId)); @@ -2045,514 +1600,6 @@ SumaParticipant::execSCAN_HBREP(Signal* signal){ } /********************************************************** - * - * Suma participant interface - * - * Creation of subscriber - * - */ - -void -SumaParticipant::execSUB_START_REQ(Signal* signal){ - jamEntry(); -#ifdef NODEFAIL_DEBUG - ndbout_c("Suma::execSUB_START_REQ"); -#endif - - CRASH_INSERTION(13013); - - if (c_restartLock) { - jam(); - // ndbout_c("c_restartLock"); - if (RtoI(signal->getSendersBlockRef(), false) == RNIL) { - jam(); - sendSubStartRef(signal, /** Error Code */ 0, true); - return; - } - // only allow other Suma's in the nodegroup to come through for restart purposes - } - - Subscription key; - - SubStartReq * const req = (SubStartReq*)signal->getDataPtr(); - - Uint32 senderRef = req->senderRef; - Uint32 senderData = req->senderData; - Uint32 subscriberData = req->subscriberData; - Uint32 subscriberRef = req->subscriberRef; - SubscriptionData::Part part = (SubscriptionData::Part)req->part; - key.m_subscriptionId = req->subscriptionId; - key.m_subscriptionKey = req->subscriptionKey; - - SubscriptionPtr subPtr; - if(!c_subscriptions.find(subPtr, key)){ - jam(); - sendSubStartRef(signal, /** Error Code */ 0); - return; - } - - Ptr<SyncRecord> syncPtr; - c_syncPool.getPtr(syncPtr, subPtr.p->m_syncPtrI); - if (syncPtr.p->m_locked) { - jam(); -#if 0 - ndbout_c("Locked"); -#endif - sendSubStartRef(signal, /** Error Code */ 0, true); - return; - } - syncPtr.p->m_locked = true; - - SubscriberPtr subbPtr; - if(!c_subscriberPool.seize(subbPtr)){ - jam(); - syncPtr.p->m_locked = false; - sendSubStartRef(signal, /** Error Code */ 0); - return; - } - - Uint32 type = subPtr.p->m_subscriptionType; - - subbPtr.p->m_senderRef = senderRef; - subbPtr.p->m_senderData = senderData; - - switch (type) { - case SubCreateReq::TableEvent: - jam(); - // we want the data to return to the API not DICT - subbPtr.p->m_subscriberRef = subscriberRef; - // ndbout_c("start ref = %u", signal->getSendersBlockRef()); - // ndbout_c("ref = %u", subbPtr.p->m_subscriberRef); - // we use the subscription id for now, should really be API choice - subbPtr.p->m_subscriberData = subscriberData; - -#if 0 - if (RtoI(signal->getSendersBlockRef(), false) == RNIL) { - jam(); - for (Uint32 i = 0; i < c_noNodesInGroup; i++) { - Uint32 ref = calcSumaBlockRef(c_nodesInGroup[i]); - if (ref != reference()) { - jam(); - sendSubStartReq(subPtr, subbPtr, signal, ref); - } else - jam(); - } - } -#endif - break; - case SubCreateReq::DatabaseSnapshot: - case SubCreateReq::SelectiveTableSnapshot: - jam(); - ndbrequire(false); - //subbPtr.p->m_subscriberRef = GREP_REF; - subbPtr.p->m_subscriberData = subPtr.p->m_subscriberData; - break; - case SubCreateReq::SingleTableScan: - jam(); - subbPtr.p->m_subscriberRef = subPtr.p->m_subscriberRef; - subbPtr.p->m_subscriberData = subPtr.p->m_subscriberData; - } - - subbPtr.p->m_subPtrI = subPtr.i; - subbPtr.p->m_firstGCI = RNIL; - if (type == SubCreateReq::TableEvent) - subbPtr.p->m_lastGCI = 0; - else - subbPtr.p->m_lastGCI = RNIL; // disable usage of m_lastGCI - bool ok = false; - - switch(part){ - case SubscriptionData::MetaData: - ok = true; - jam(); - c_metaSubscribers.add(subbPtr); - sendSubStartComplete(signal, subbPtr, 0, part); - break; - case SubscriptionData::TableData: - ok = true; - jam(); - c_prepDataSubscribers.add(subbPtr); - syncPtr.p->startTrigger(signal); - break; - } - ndbrequire(ok); -} - -void -SumaParticipant::sendSubStartComplete(Signal* signal, - SubscriberPtr subbPtr, - Uint32 firstGCI, - SubscriptionData::Part part){ - jam(); - - SubscriptionPtr subPtr; - c_subscriptions.getPtr(subPtr, subbPtr.p->m_subPtrI); - - Ptr<SyncRecord> syncPtr; - c_syncPool.getPtr(syncPtr, subPtr.p->m_syncPtrI); - syncPtr.p->m_locked = false; - - SubStartConf * const conf = (SubStartConf*)signal->getDataPtrSend(); - - conf->senderRef = reference(); - conf->senderData = subbPtr.p->m_senderData; - conf->subscriptionId = subPtr.p->m_subscriptionId; - conf->subscriptionKey = subPtr.p->m_subscriptionKey; - conf->firstGCI = firstGCI; - conf->part = (Uint32) part; - - conf->subscriberData = subPtr.p->m_subscriberData; - sendSignal(subPtr.p->m_subscriberRef, GSN_SUB_START_CONF, signal, - SubStartConf::SignalLength, JBB); -} - -#if 0 -void -SumaParticipant::sendSubStartRef(SubscriptionPtr subPtr, - Signal* signal, Uint32 errCode, - bool temporary){ - jam(); - SubStartRef * ref = (SubStartRef *)signal->getDataPtrSend(); - xxx ref->senderRef = reference(); - xxx ref->senderData = subPtr.p->m_senderData; - ref->subscriptionId = subPtr.p->m_subscriptionId; - ref->subscriptionKey = subPtr.p->m_subscriptionKey; - ref->part = (Uint32) subPtr.p->m_subscriptionType; - ref->subscriberData = subPtr.p->m_subscriberData; - ref->err = errCode; - if (temporary) { - jam(); - ref->setTemporary(); - } - releaseSections(signal); - sendSignal(subPtr.p->m_subscriberRef, GSN_SUB_START_REF, signal, - SubStartRef::SignalLength, JBB); -} -#endif -void -SumaParticipant::sendSubStartRef(Signal* signal, Uint32 errCode, - bool temporary){ - jam(); - SubStartRef * ref = (SubStartRef *)signal->getDataPtrSend(); - ref->senderRef = reference(); - ref->err = errCode; - if (temporary) { - jam(); - ref->setTemporary(); - } - releaseSections(signal); - sendSignal(signal->getSendersBlockRef(), GSN_SUB_START_REF, signal, - SubStartRef::SignalLength, JBB); -} - -/********************************************************** - * - * Trigger admin interface - * - */ - -void -SumaParticipant::SyncRecord::startTrigger(Signal* signal){ - jam(); - m_currentTable = 0; - m_latestTriggerId = RNIL; - nextTrigger(signal); -} - -void -SumaParticipant::SyncRecord::nextTrigger(Signal* signal){ - jam(); - - TableList::DataBufferIterator it; - - if(!m_tableList.position(it, m_currentTable)){ - completeTrigger(signal); - return; - } - - SubscriptionPtr subPtr; - suma.c_subscriptions.getPtr(subPtr, m_subscriptionPtrI); - ndbrequire(subPtr.p->m_syncPtrI == ptrI); - const Uint32 RT_BREAK = 48; - Uint32 latestTriggerId = 0; - for(Uint32 i = 0; i<RT_BREAK && !it.isNull(); i++, m_tableList.next(it)){ - TablePtr tabPtr; -#if 0 - ndbout_c("nextTrigger tableid %u", *it.data); -#endif - ndbrequire(suma.c_tables.find(tabPtr, *it.data)); - - AttributeMask attrMask; - createAttributeMask(attrMask, tabPtr.p); - - for(Uint32 j = 0; j<3; j++){ - i++; - latestTriggerId = (tabPtr.p->m_schemaVersion << 18) | - (j << 16) | tabPtr.p->m_tableId; - if(tabPtr.p->m_hasTriggerDefined[j] == 0) { - ndbrequire(tabPtr.p->m_triggerIds[j] == ILLEGAL_TRIGGER_ID); -#if 0 - ndbout_c("DEFINING trigger on table %u[%u]", tabPtr.p->m_tableId, j); -#endif - CreateTrigReq * const req = (CreateTrigReq*)signal->getDataPtrSend(); - req->setUserRef(SUMA_REF); - req->setConnectionPtr(ptrI); - req->setTriggerType(TriggerType::SUBSCRIPTION_BEFORE); - req->setTriggerActionTime(TriggerActionTime::TA_DETACHED); - req->setMonitorReplicas(true); - req->setMonitorAllAttributes(false); - req->setReceiverRef(SUMA_REF); - req->setTriggerId(latestTriggerId); - req->setTriggerEvent((TriggerEvent::Value)j); - req->setTableId(tabPtr.p->m_tableId); - req->setAttributeMask(attrMask); - suma.sendSignal(DBTUP_REF, GSN_CREATE_TRIG_REQ, - signal, CreateTrigReq::SignalLength, JBB); - - } else { - /** - * Faking that a trigger has been created in order to - * simulate the proper behaviour. - * Perhaps this should be a dummy signal instead of - * (ab)using CREATE_TRIG_CONF. - */ - CreateTrigConf * conf = (CreateTrigConf*)signal->getDataPtrSend(); - conf->setConnectionPtr(ptrI); - conf->setTableId(tabPtr.p->m_tableId); - conf->setTriggerId(latestTriggerId); - suma.sendSignal(SUMA_REF,GSN_CREATE_TRIG_CONF, - signal, CreateTrigConf::SignalLength, JBB); - - } - - } - m_currentTable++; - } - m_latestTriggerId = latestTriggerId; -} - -void -SumaParticipant::SyncRecord::createAttributeMask(AttributeMask& mask, - Table * table){ - jam(); - mask.clear(); - DataBuffer<15>::DataBufferIterator it; - LocalDataBuffer<15> attrBuf(suma.c_dataBufferPool, table->m_attributes); - for(attrBuf.first(it); !it.curr.isNull(); attrBuf.next(it)){ - mask.set(* it.data); - } -} - -void -SumaParticipant::SyncRecord::runCREATE_TRIG_CONF(Signal* signal){ - jam(); - - CreateTrigConf * const conf = (CreateTrigConf*)signal->getDataPtr(); - const Uint32 triggerId = conf->getTriggerId(); - Uint32 type = (triggerId >> 16) & 0x3; - Uint32 tableId = conf->getTableId(); - - TablePtr tabPtr; - ndbrequire(suma.c_tables.find(tabPtr, tableId)); - - ndbrequire(type < 3); - tabPtr.p->m_triggerIds[type] = triggerId; - tabPtr.p->m_hasTriggerDefined[type]++; - - if(triggerId == m_latestTriggerId){ - jam(); - nextTrigger(signal); - } -} - -void -SumaParticipant::SyncRecord::completeTrigger(Signal* signal){ - jam(); - SubscriptionPtr subPtr; - CRASH_INSERTION(13013); -#ifdef EVENT_PH3_DEBUG - ndbout_c("SumaParticipant: trigger completed"); -#endif - Uint32 gci; - suma.c_subscriptions.getPtr(subPtr, m_subscriptionPtrI); - ndbrequire(subPtr.p->m_syncPtrI == ptrI); - - SubscriberPtr subbPtr; - { - bool found = false; - - for(suma.c_prepDataSubscribers.first(subbPtr); - !subbPtr.isNull(); suma.c_prepDataSubscribers.next(subbPtr)) { - jam(); - if(subbPtr.p->m_subPtrI == subPtr.i) { - jam(); - found = true; - break; - } - } - ndbrequire(found); - gci = suma.getFirstGCI(signal); - subbPtr.p->m_firstGCI = gci; - suma.c_prepDataSubscribers.remove(subbPtr); - suma.c_dataSubscribers.add(subbPtr); - } - suma.sendSubStartComplete(signal, subbPtr, gci, SubscriptionData::TableData); -} - -void -SumaParticipant::SyncRecord::startDropTrigger(Signal* signal){ - jam(); - m_currentTable = 0; - m_latestTriggerId = RNIL; - nextDropTrigger(signal); -} - -void -SumaParticipant::SyncRecord::nextDropTrigger(Signal* signal){ - jam(); - - TableList::DataBufferIterator it; - - if(!m_tableList.position(it, m_currentTable)){ - completeDropTrigger(signal); - return; - } - - SubscriptionPtr subPtr; - suma.c_subscriptions.getPtr(subPtr, m_subscriptionPtrI); - ndbrequire(subPtr.p->m_syncPtrI == ptrI); - - const Uint32 RT_BREAK = 48; - Uint32 latestTriggerId = 0; - for(Uint32 i = 0; i<RT_BREAK && !it.isNull(); i++, m_tableList.next(it)){ - jam(); - TablePtr tabPtr; -#if 0 - ndbout_c("nextDropTrigger tableid %u", *it.data); -#endif - ndbrequire(suma.c_tables.find(tabPtr, * it.data)); - - for(Uint32 j = 0; j<3; j++){ - jam(); - ndbrequire(tabPtr.p->m_triggerIds[j] != ILLEGAL_TRIGGER_ID); - i++; - latestTriggerId = tabPtr.p->m_triggerIds[j]; - if(tabPtr.p->m_hasTriggerDefined[j] == 1) { - jam(); - - DropTrigReq * const req = (DropTrigReq*)signal->getDataPtrSend(); - req->setConnectionPtr(ptrI); - req->setUserRef(SUMA_REF); // Sending to myself - req->setRequestType(DropTrigReq::RT_USER); - req->setTriggerType(TriggerType::SUBSCRIPTION_BEFORE); - req->setTriggerActionTime(TriggerActionTime::TA_DETACHED); - req->setIndexId(RNIL); - - req->setTableId(tabPtr.p->m_tableId); - req->setTriggerId(latestTriggerId); - req->setTriggerEvent((TriggerEvent::Value)j); - -#if 0 - ndbout_c("DROPPING trigger %u = %u %u %u on table %u[%u]", - latestTriggerId,TriggerType::SUBSCRIPTION_BEFORE, - TriggerActionTime::TA_DETACHED, j, tabPtr.p->m_tableId, j); -#endif - suma.sendSignal(DBTUP_REF, GSN_DROP_TRIG_REQ, - signal, DropTrigReq::SignalLength, JBB); - } else { - jam(); - ndbrequire(tabPtr.p->m_hasTriggerDefined[j] > 1); - /** - * Faking that a trigger has been dropped in order to - * simulate the proper behaviour. - * Perhaps this should be a dummy signal instead of - * (ab)using DROP_TRIG_CONF. - */ - DropTrigConf * conf = (DropTrigConf*)signal->getDataPtrSend(); - conf->setConnectionPtr(ptrI); - conf->setTableId(tabPtr.p->m_tableId); - conf->setTriggerId(latestTriggerId); - suma.sendSignal(SUMA_REF,GSN_DROP_TRIG_CONF, - signal, DropTrigConf::SignalLength, JBB); - } - } - m_currentTable++; - } - m_latestTriggerId = latestTriggerId; -} - -void -SumaParticipant::SyncRecord::runDROP_TRIG_REF(Signal* signal){ - jam(); - DropTrigRef * const ref = (DropTrigRef*)signal->getDataPtr(); - if (ref->getErrorCode() != DropTrigRef::TriggerNotFound){ - ndbrequire(false); - } - const Uint32 triggerId = ref->getTriggerId(); - Uint32 tableId = ref->getTableId(); - runDropTrig(signal, triggerId, tableId); -} - -void -SumaParticipant::SyncRecord::runDROP_TRIG_CONF(Signal* signal){ - jam(); - - DropTrigConf * const conf = (DropTrigConf*)signal->getDataPtr(); - const Uint32 triggerId = conf->getTriggerId(); - Uint32 tableId = conf->getTableId(); - runDropTrig(signal, triggerId, tableId); -} - -void -SumaParticipant::SyncRecord::runDropTrig(Signal* signal, - Uint32 triggerId, - Uint32 tableId){ - Uint32 type = (triggerId >> 16) & 0x3; - - TablePtr tabPtr; - ndbrequire(suma.c_tables.find(tabPtr, tableId)); - - ndbrequire(type < 3); - ndbrequire(tabPtr.p->m_triggerIds[type] == triggerId); - tabPtr.p->m_hasTriggerDefined[type]--; - if (tabPtr.p->m_hasTriggerDefined[type] == 0) { - jam(); - tabPtr.p->m_triggerIds[type] = ILLEGAL_TRIGGER_ID; - } - if(triggerId == m_latestTriggerId){ - jam(); - nextDropTrigger(signal); - } -} - -void -SumaParticipant::SyncRecord::completeDropTrigger(Signal* signal){ - jam(); - SubscriptionPtr subPtr; - CRASH_INSERTION(13014); -#if 0 - ndbout_c("trigger completed"); -#endif - - suma.c_subscriptions.getPtr(subPtr, m_subscriptionPtrI); - ndbrequire(subPtr.p->m_syncPtrI == ptrI); - - bool found = false; - SubscriberPtr subbPtr; - for(suma.c_prepDataSubscribers.first(subbPtr); - !subbPtr.isNull(); suma.c_prepDataSubscribers.next(subbPtr)) { - jam(); - if(subbPtr.p->m_subPtrI == subPtr.i) { - jam(); - found = true; - break; - } - } - ndbrequire(found); - suma.sendSubStopComplete(signal, subbPtr); -} - -/********************************************************** * Scan data interface * * Assumption: one execTRANSID_AI contains all attr info @@ -2645,705 +1692,6 @@ SumaParticipant::execTRANSID_AI(Signal* signal){ f_bufferLock = 0; } -/********************************************************** - * - * Trigger data interface - * - */ - -void -SumaParticipant::execTRIG_ATTRINFO(Signal* signal){ - jamEntry(); - - CRASH_INSERTION(13016); - TrigAttrInfo* const trg = (TrigAttrInfo*)signal->getDataPtr(); - const Uint32 trigId = trg->getTriggerId(); - - const Uint32 dataLen = signal->length() - TrigAttrInfo::StaticLength; - - if(trg->getAttrInfoType() == TrigAttrInfo::BEFORE_VALUES){ - jam(); - - ndbrequire(b_bufferLock == trigId); - - memcpy(b_buffer + b_trigBufferSize, trg->getData(), 4 * dataLen); - b_trigBufferSize += dataLen; - // printf("before values %u %u %u\n",trigId, dataLen, b_trigBufferSize); - } else { - jam(); - - if(f_bufferLock == 0){ - f_bufferLock = trigId; - f_trigBufferSize = 0; - b_bufferLock = trigId; - b_trigBufferSize = 0; - } else { - ndbrequire(f_bufferLock == trigId); - } - - memcpy(f_buffer + f_trigBufferSize, trg->getData(), 4 * dataLen); - f_trigBufferSize += dataLen; - } -} - -#ifdef NODEFAIL_DEBUG2 -static int theCounts[64] = {0}; -#endif - -Uint32 -Suma::getStoreBucket(Uint32 v) -{ - // id will contain id to responsible suma or - // RNIL if we don't have nodegroup info yet - - const Uint32 N = NO_OF_BUCKETS; - const Uint32 D = v % N; // Distibution key - return D; -} - -Uint32 -Suma::getResponsibleSumaNodeId(Uint32 D) -{ - // id will contain id to responsible suma or - // RNIL if we don't have nodegroup info yet - - Uint32 id; - - if (c_restartLock) { - jam(); - // ndbout_c("c_restartLock"); - id = RNIL; - } else { - jam(); - id = RNIL; - const Uint32 n = c_noNodesInGroup; // Number nodes in node group - const Uint32 C1 = D / n; - const Uint32 C2 = D - C1*n; // = D % n; - const Uint32 C = C2 + C1 % n; - for (Uint32 i = 0; i < n; i++) { - jam(); - id = c_nodesInGroup[(C + i) % n]; - if (c_aliveNodes.get(id) && - !c_preparingNodes.get(id)) { - jam(); - break; - }//if - } -#ifdef NODEFAIL_DEBUG2 - theCounts[id]++; - ndbout_c("Suma:responsible n=%u, D=%u, id = %u, count=%u", - n,D, id, theCounts[id]); -#endif - } - return id; -} - -Uint32 -SumaParticipant::decideWhoToSend(Uint32 nBucket, Uint32 gci){ - bool replicaFlag = true; - Uint32 nId = RNIL; - - // bucket active/not active set by GCP_COMPLETE - if (c_buckets[nBucket].active) { - if (c_buckets[nBucket].handover && c_buckets[nBucket].handoverGCI <= gci) { - jam(); - replicaFlag = true; // let the other node send this - nId = RNIL; - // mark this as started, if we get a node failiure now we have some lost stuff - c_buckets[nBucket].handover_started = true; - } else { - jam(); - replicaFlag = false; - nId = refToNode(reference()); - } - } else { - nId = getResponsibleSumaNodeId(nBucket); - replicaFlag = !(nId == refToNode(reference())); - - if (!replicaFlag) { - if (!c_buckets[nBucket].handover) { - jam(); - // appearently a node has failed and we are taking over sending - // from that bucket. Now we need to go back to latest completed - // GCI. Handling will depend on Subscriber and Subscription - - // TODO, for now we make an easy takeover - if (gci < c_nodeFailGCI) - c_lastInconsistentGCI = gci; - - // we now have responsability for this bucket and we're actively - // sending from that - c_buckets[nBucket].active = true; -#ifdef HANDOVER_DEBUG - ndbout_c("Takeover Bucket %u", nBucket); -#endif - } else if (c_buckets[nBucket].handoverGCI > gci) { - jam(); - replicaFlag = true; // handover going on, but don't start sending yet - nId = RNIL; - } else { - jam(); -#ifdef HANDOVER_DEBUG - ndbout_c("Possible error: Will send from GCI = %u", gci); -#endif - } - } - } - -#ifdef NODEFAIL_DEBUG2 - ndbout_c("Suma:bucket %u, responsible id = %u, replicaFlag = %u", - nBucket, nId, (Uint32)replicaFlag); -#endif - return replicaFlag; -} - -void -SumaParticipant::execFIRE_TRIG_ORD(Signal* signal){ - jamEntry(); - - CRASH_INSERTION(13016); - FireTrigOrd* const trg = (FireTrigOrd*)signal->getDataPtr(); - const Uint32 trigId = trg->getTriggerId(); - const Uint32 hashValue = trg->getHashValue(); - const Uint32 gci = trg->getGCI(); - const Uint32 event = trg->getTriggerEvent(); - const Uint32 triggerId = trg->getTriggerId(); - Uint32 tableId = triggerId & 0xFFFF; - - ndbrequire(f_bufferLock == trigId); - -#ifdef EVENT_DEBUG2 - ndbout_c("SumaParticipant::execFIRE_TRIG_ORD"); -#endif - - Uint32 sz = trg->getNoOfPrimaryKeyWords()+trg->getNoOfAfterValueWords(); - ndbrequire(sz == f_trigBufferSize); - - /** - * Reformat as "all headers" + "all data" - */ - Uint32 dataLen = 0; - Uint32 noOfAttrs = 0; - Uint32 * src = f_buffer; - Uint32 * headers = signal->theData + 25; - Uint32 * dst = signal->theData + 25 + MAX_ATTRIBUTES_IN_TABLE; - - LinearSectionPtr ptr[3]; - int nptr; - - ptr[0].p = headers; - ptr[1].p = dst; - - while(sz > 0){ - jam(); - Uint32 tmp = * src ++; - * headers ++ = tmp; - Uint32 len = AttributeHeader::getDataSize(tmp); - memcpy(dst, src, 4 * len); - dst += len; - src += len; - - noOfAttrs++; - dataLen += len; - sz -= (1 + len); - } - ndbrequire(sz == 0); - - ptr[0].sz = noOfAttrs; - ptr[1].sz = dataLen; - - if (b_trigBufferSize > 0) { - jam(); - ptr[2].p = b_buffer; - ptr[2].sz = b_trigBufferSize; - nptr = 3; - } else { - jam(); - nptr = 2; - } - - // right now only for tableEvent - bool replicaFlag = decideWhoToSend(getStoreBucket(hashValue), gci); - - /** - * Signal to subscriber(s) - */ - SubTableData * data = (SubTableData*)signal->getDataPtrSend();//trg; - data->gci = gci; - data->tableId = tableId; - data->operation = event; - data->noOfAttributes = noOfAttrs; - data->dataSize = dataLen; - - SubscriberPtr subbPtr; - for(c_dataSubscribers.first(subbPtr); !subbPtr.isNull(); - c_dataSubscribers.next(subbPtr)){ - if (subbPtr.p->m_firstGCI > gci) { -#ifdef EVENT_DEBUG - ndbout_c("m_firstGCI = %u, gci = %u", subbPtr.p->m_firstGCI, gci); -#endif - jam(); - // we're either restarting or it's a newly created subscriber - // and waiting for the right gci - continue; - } - - jam(); - - const Uint32 ref = subbPtr.p->m_subscriberRef; - // ndbout_c("ref = %u", ref); - const Uint32 subdata = subbPtr.p->m_subscriberData; - data->senderData = subdata; - /* - * get subscription ptr for this subscriber - */ - SubscriptionPtr subPtr; - c_subscriptions.getPtr(subPtr, subbPtr.p->m_subPtrI); - - if(!subPtr.p->m_tables[tableId]) { - jam(); - continue; - //continue in for-loop if the table is not part of - //the subscription. Otherwise, send data to subscriber. - } - - if (subPtr.p->m_subscriptionType == SubCreateReq::TableEvent) { - if (replicaFlag) { - jam(); - c_failoverBuffer.subTableData(gci,NULL,0); - continue; - } - jam(); - Uint32 tmp = data->logType; - if (c_lastInconsistentGCI == data->gci) { - data->setGCINotConsistent(); - } - -#ifdef HANDOVER_DEBUG - { - static int aLongGCIName = 0; - if (data->gci != aLongGCIName) { - aLongGCIName = data->gci; - ndbout_c("sent from GCI = %u", aLongGCIName); - } - } -#endif - sendSignal(ref, GSN_SUB_TABLE_DATA, signal, - SubTableData::SignalLength, JBB, ptr, nptr); - data->logType = tmp; - } else { - ndbassert(refToNode(ref) == 0 || refToNode(ref) == getOwnNodeId()); - jam(); -#if PRINT_ONLY - ndbout_c("GSN_SUB_TABLE_DATA to %s: op: %d #attr: %d len: %d", - getBlockName(refToBlock(ref)), - noOfAttrs, dataLen); - -#else -#ifdef HANDOVER_DEBUG - { - static int aLongGCIName2 = 0; - if (data->gci != aLongGCIName2) { - aLongGCIName2 = data->gci; - ndbout_c("(EXECUTE_DIRECT) sent from GCI = %u to %u", aLongGCIName2, ref); - } - } -#endif - EXECUTE_DIRECT(refToBlock(ref), GSN_SUB_TABLE_DATA, signal, - SubTableData::SignalLength); - jamEntry(); -#endif - } - } - - /** - * Reset f_bufferLock - */ - f_bufferLock = 0; - b_bufferLock = 0; -} - -void -SumaParticipant::execSUB_GCP_COMPLETE_REP(Signal* signal){ - jamEntry(); - - SubGcpCompleteRep * rep = (SubGcpCompleteRep*)signal->getDataPtrSend(); - - Uint32 gci = rep->gci; - c_lastCompleteGCI = gci; - - /** - * Signal to subscriber(s) - */ - - SubscriberPtr subbPtr; - SubscriptionPtr subPtr; - c_dataSubscribers.first(subbPtr); - for(; !subbPtr.isNull(); c_dataSubscribers.next(subbPtr)){ - - if (subbPtr.p->m_firstGCI > gci) { - jam(); - // we don't send SUB_GCP_COMPLETE_REP for incomplete GCI's - continue; - } - - const Uint32 ref = subbPtr.p->m_subscriberRef; - rep->senderRef = ref; - rep->senderData = subbPtr.p->m_subscriberData; - - c_subscriptions.getPtr(subPtr, subbPtr.p->m_subPtrI); -#if PRINT_ONLY - ndbout_c("GSN_SUB_GCP_COMPLETE_REP to %s:", - getBlockName(refToBlock(ref))); -#else - - CRASH_INSERTION(13018); - - if (subPtr.p->m_subscriptionType == SubCreateReq::TableEvent) - { - jam(); - sendSignal(ref, GSN_SUB_GCP_COMPLETE_REP, signal, - SubGcpCompleteRep::SignalLength, JBB); - } - else - { - jam(); - ndbassert(refToNode(ref) == 0 || refToNode(ref) == getOwnNodeId()); - EXECUTE_DIRECT(refToBlock(ref), GSN_SUB_GCP_COMPLETE_REP, signal, - SubGcpCompleteRep::SignalLength); - jamEntry(); - } -#endif - } - - if (c_handoverToDo) { - jam(); - c_handoverToDo = false; - for( int i = 0; i < NO_OF_BUCKETS; i++) { - if (c_buckets[i].handover) { - if (c_buckets[i].handoverGCI > gci) { - jam(); - c_handoverToDo = true; // still waiting for the right GCI - break; /* since all handover should happen at the same time - * we can break here - */ - } else { - c_buckets[i].handover = false; -#ifdef HANDOVER_DEBUG - ndbout_c("Handover Bucket %u", i); -#endif - if (getResponsibleSumaNodeId(i) == refToNode(reference())) { - // my bucket to be handed over to me - ndbrequire(!c_buckets[i].active); - jam(); - c_buckets[i].active = true; - } else { - // someone else's bucket to handover to - ndbrequire(c_buckets[i].active); - jam(); - c_buckets[i].active = false; - } - } - } - } - } -} - -/*********************************************************** - * - * Embryo to syncronize the Suma's so as to know if a subscriber - * has received a GCP_COMPLETE from all suma's or not - * - */ - -void -SumaParticipant::runSUB_GCP_COMPLETE_ACC(Signal* signal){ - jam(); - - SubGcpCompleteAcc * const acc = (SubGcpCompleteAcc*)signal->getDataPtr(); - - Uint32 gci = acc->rep.gci; - -#ifdef EVENT_DEBUG - ndbout_c("SumaParticipant::runSUB_GCP_COMPLETE_ACC gci = %u", gci); -#endif - - c_failoverBuffer.subGcpCompleteRep(gci); -} - -void -Suma::execSUB_GCP_COMPLETE_ACC(Signal* signal){ - jamEntry(); - - if (RtoI(signal->getSendersBlockRef(), false) != RNIL) { - jam(); - // Ack from other SUMA - runSUB_GCP_COMPLETE_ACC(signal); - return; - } - - jam(); - // Ack from User and not an acc from other SUMA, redistribute in nodegroup - - SubGcpCompleteAcc * const acc = (SubGcpCompleteAcc*)signal->getDataPtr(); - Uint32 gci = acc->rep.gci; - Uint32 senderRef = acc->rep.senderRef; - Uint32 subscriberData = acc->rep.subscriberData; - -#ifdef EVENT_DEBUG - ndbout_c("Suma::execSUB_GCP_COMPLETE_ACC gci = %u", gci); -#endif - bool moreToCome = false; - - SubscriberPtr subbPtr; - for(c_dataSubscribers.first(subbPtr); - !subbPtr.isNull(); c_dataSubscribers.next(subbPtr)){ -#ifdef EVENT_DEBUG - ndbout_c("Suma::execSUB_GCP_COMPLETE_ACC %u == %u && %u == %u", - subbPtr.p->m_subscriberRef, - senderRef, - subbPtr.p->m_subscriberData, - subscriberData); -#endif - if (subbPtr.p->m_subscriberRef == senderRef && - subbPtr.p->m_subscriberData == subscriberData) { - jam(); -#ifdef EVENT_DEBUG - ndbout_c("Suma::execSUB_GCP_COMPLETE_ACC gci = FOUND SUBSCRIBER"); -#endif - subbPtr.p->m_lastGCI = gci; - } else if (subbPtr.p->m_lastGCI < gci) { - jam(); - if (subbPtr.p->m_firstGCI <= gci) - moreToCome = true; - } else - jam(); - } - - if (!moreToCome) { - // tell the other SUMA's that I'm done with this GCI - jam(); - for (Uint32 i = 0; i < c_noNodesInGroup; i++) { - Uint32 id = c_nodesInGroup[i]; - Uint32 ref = calcSumaBlockRef(id); - if ((ref != reference()) && c_aliveNodes.get(id)) { - jam(); - sendSignal(ref, GSN_SUB_GCP_COMPLETE_ACC, signal, - SubGcpCompleteAcc::SignalLength, JBB); - } else - jam(); - } - } -} - -static Uint32 tmpFailoverBuffer[512]; -//SumaParticipant::FailoverBuffer::FailoverBuffer(DataBuffer<15>::DataBufferPool & p) -// : m_dataList(p), -SumaParticipant::FailoverBuffer::FailoverBuffer() - : - c_gcis(tmpFailoverBuffer), c_sz(512), c_first(0), c_next(0), c_full(false) -{ -} - -bool SumaParticipant::FailoverBuffer::subTableData(Uint32 gci, Uint32 *src, int sz) -{ - bool ok = true; - - if (c_full) { - ok = false; -#ifdef EVENT_DEBUG - ndbout_c("Suma::FailoverBuffer::SubTableData buffer full gci=%u"); -#endif - } else { - c_gcis[c_next] = gci; - c_next++; - if (c_next == c_sz) c_next = 0; - if (c_next == c_first) - c_full = true; - // ndbout_c("%u %u %u",c_first,c_next,c_sz); - } - return ok; -} -bool SumaParticipant::FailoverBuffer::subGcpCompleteRep(Uint32 gci) -{ - bool ok = true; - - // ndbout_c("Empty"); - while (true) { - if (c_first == c_next && !c_full) - break; - if (c_gcis[c_first] > gci) - break; - c_full = false; - c_first++; - if (c_first == c_sz) c_first = 0; - // ndbout_c("%u %u %u : ",c_first,c_next,c_sz); - } - - return ok; -} -bool SumaParticipant::FailoverBuffer::nodeFailRep() -{ - bool ok = true; - while (true) { - if (c_first == c_next && !c_full) - break; - -#ifdef EVENT_DEBUG - ndbout_c("Suma::FailoverBuffer::NodeFailRep resending gci=%u", c_gcis[c_first]); -#endif - c_full = false; - c_first++; - if (c_first == c_sz) c_first = 0; - } - return ok; -} - -/********************************************************** - * Suma participant interface - * - * Stopping and removing of subscriber - * - */ - -void -SumaParticipant::execSUB_STOP_REQ(Signal* signal){ - jamEntry(); - - CRASH_INSERTION(13019); - - SubStopReq * const req = (SubStopReq*)signal->getDataPtr(); - Uint32 senderRef = signal->getSendersBlockRef(); - Uint32 senderData = req->senderData; - Uint32 subscriberRef = req->subscriberRef; - Uint32 subscriberData = req->subscriberData; - SubscriptionPtr subPtr; - Subscription key; - key.m_subscriptionId = req->subscriptionId; - key.m_subscriptionKey = req->subscriptionKey; - Uint32 part = req->part; - - if (key.m_subscriptionKey == 0 && - key.m_subscriptionId == 0 && - subscriberData == 0) { - SubStopConf* conf = (SubStopConf*)signal->getDataPtrSend(); - - conf->senderRef = reference(); - conf->senderData = senderData; - conf->subscriptionId = key.m_subscriptionId; - conf->subscriptionKey = key.m_subscriptionKey; - conf->subscriberData = subscriberData; - - sendSignal(senderRef, GSN_SUB_STOP_CONF, signal, - SubStopConf::SignalLength, JBB); - - removeSubscribersOnNode(signal, refToNode(subscriberRef)); - return; - } - - if(!c_subscriptions.find(subPtr, key)){ - jam(); - sendSubStopRef(signal, GrepError::SUBSCRIPTION_ID_NOT_FOUND); - return; - } - - ndbrequire(part == SubscriptionData::TableData); - - SubscriberPtr subbPtr; - if (senderRef == reference()){ - jam(); - c_subscriberPool.getPtr(subbPtr, senderData); - ndbrequire(subbPtr.p->m_subPtrI == subPtr.i && - subbPtr.p->m_subscriberRef == subscriberRef && - subbPtr.p->m_subscriberData == subscriberData); - c_removeDataSubscribers.remove(subbPtr); - } else { - bool found = false; - jam(); - c_dataSubscribers.first(subbPtr); - for (;!subbPtr.isNull(); c_dataSubscribers.next(subbPtr)){ - jam(); - if (subbPtr.p->m_subPtrI == subPtr.i && - subbPtr.p->m_subscriberRef == subscriberRef && - subbPtr.p->m_subscriberData == subscriberData){ - // ndbout_c("STOP_REQ: before c_dataSubscribers.release"); - jam(); - c_dataSubscribers.remove(subbPtr); - found = true; - break; - } - } - /** - * If we didn't find anyone, send ref - */ - if (!found) { - jam(); - sendSubStopRef(signal, GrepError::SUBSCRIBER_NOT_FOUND); - return; - } - } - - subbPtr.p->m_senderRef = senderRef; // store ref to requestor - subbPtr.p->m_senderData = senderData; // store ref to requestor - c_prepDataSubscribers.add(subbPtr); - - Ptr<SyncRecord> syncPtr; - c_syncPool.getPtr(syncPtr, subPtr.p->m_syncPtrI); - if (syncPtr.p->m_locked) { - jam(); - sendSubStopRef(signal, /** Error Code */ 0, true); - return; - } - syncPtr.p->m_locked = true; - - syncPtr.p->startDropTrigger(signal); -} - -void -SumaParticipant::sendSubStopComplete(Signal* signal, SubscriberPtr subbPtr){ - jam(); - - CRASH_INSERTION(13020); - - SubscriptionPtr subPtr; - c_subscriptions.getPtr(subPtr, subbPtr.p->m_subPtrI); - - Ptr<SyncRecord> syncPtr; - c_syncPool.getPtr(syncPtr, subPtr.p->m_syncPtrI); - syncPtr.p->m_locked = false; - - SubStopConf * const conf = (SubStopConf*)signal->getDataPtrSend(); - - conf->senderRef = reference(); - conf->senderData = subbPtr.p->m_senderData; - conf->subscriptionId = subPtr.p->m_subscriptionId; - conf->subscriptionKey = subPtr.p->m_subscriptionKey; - conf->subscriberData = subbPtr.p->m_subscriberData; - Uint32 senderRef = subbPtr.p->m_senderRef; - - c_prepDataSubscribers.release(subbPtr); - sendSignal(senderRef, GSN_SUB_STOP_CONF, signal, - SubStopConf::SignalLength, JBB); -} - -void -SumaParticipant::sendSubStopRef(Signal* signal, Uint32 errCode, - bool temporary){ - jam(); - SubStopRef * ref = (SubStopRef *)signal->getDataPtrSend(); - ref->senderRef = reference(); - ref->errorCode = errCode; - if (temporary) { - ref->setTemporary(); - } - sendSignal(signal->getSendersBlockRef(), - GSN_SUB_STOP_REF, - signal, - SubStopRef::SignalLength, - JBB); - return; -} - /************************************************************** * * Removing subscription @@ -3374,36 +1722,6 @@ SumaParticipant::execSUB_REMOVE_REQ(Signal* signal) { { jam(); SubscriberPtr i_subbPtr; - for(c_prepDataSubscribers.first(i_subbPtr); - !i_subbPtr.isNull(); c_prepDataSubscribers.next(i_subbPtr)){ - jam(); - if( i_subbPtr.p->m_subPtrI == subPtr.i ) { - jam(); - sendSubRemoveRef(signal, req, /* ErrorCode */ 0, true); - return; - // c_prepDataSubscribers.release(subbPtr); - } - } - c_dataSubscribers.first(i_subbPtr); - while(!i_subbPtr.isNull()){ - jam(); - SubscriberPtr subbPtr = i_subbPtr; - c_dataSubscribers.next(i_subbPtr); - if( subbPtr.p->m_subPtrI == subPtr.i ) { - jam(); - sendSubRemoveRef(signal, req, /* ErrorCode */ 0, true); - return; - /* Unfinished/untested code. If remove should be possible - * even if subscribers are left these have to be stopped - * first. See m_markRemove, m_nSubscribers. We need also to - * block remove for this subscription so that multiple - * removes is not possible... - */ - c_dataSubscribers.remove(subbPtr); - c_removeDataSubscribers.add(subbPtr); - count++; - } - } c_metaSubscribers.first(i_subbPtr); while(!i_subbPtr.isNull()){ jam(); @@ -3419,15 +1737,7 @@ SumaParticipant::execSUB_REMOVE_REQ(Signal* signal) { subPtr.p->m_senderRef = senderRef; subPtr.p->m_senderData = req.senderData; - if (count > 0){ - jam(); - ndbrequire(false); // code not finalized - subPtr.p->m_markRemove = true; - subPtr.p->m_nSubscribers = count; - sendSubStopReq(signal); - } else { - completeSubRemoveReq(signal, subPtr); - } + completeSubRemoveReq(signal, subPtr); } void @@ -3492,6 +1802,8 @@ SumaParticipant::sendSubRemoveRef(Signal* signal, const SubRemoveReq& req, jam(); SubRemoveRef * ref = (SubRemoveRef *)signal->getDataPtrSend(); ref->senderRef = reference(); + ref->subscriptionId = req.subscriptionId; + ref->subscriptionKey = req.subscriptionKey; ref->senderData = req.senderData; ref->err = errCode; if (temporary) @@ -3522,486 +1834,5 @@ SumaParticipant::SyncRecord::release(){ attrBuf.release(); } - -/************************************************************** - * - * Restarting remote node functions, master functionality - * (slave does nothing special) - * - triggered on INCL_NODEREQ calling startNode - * - included node will issue START_ME when it's ready to start - * the subscribers - * - */ - -Suma::Restart::Restart(Suma& s) : suma(s) { - for (int i = 0; i < MAX_REPLICAS; i++) { - c_okToStart[i] = false; - c_waitingToStart[i] = false; - } -} - -void -Suma::Restart::resetNode(Uint32 sumaRef) -{ - jam(); - int I = suma.RtoI(sumaRef); - c_okToStart[I] = false; - c_waitingToStart[I] = false; -} - -void -Suma::Restart::startNode(Signal* signal, Uint32 sumaRef) -{ - jam(); - resetNode(sumaRef); - - // right now we can only handle restarting one node - // at a time in a node group - - createSubscription(signal, sumaRef); -} - -void -Suma::Restart::createSubscription(Signal* signal, Uint32 sumaRef) { - jam(); - suma.c_subscriptions.first(c_subPtr); - nextSubscription(signal, sumaRef); -} - -void -Suma::Restart::nextSubscription(Signal* signal, Uint32 sumaRef) { - jam(); - if (c_subPtr.isNull()) { - jam(); - completeSubscription(signal, sumaRef); - return; - } - SubscriptionPtr subPtr; - subPtr.i = c_subPtr.curr.i; - subPtr.p = suma.c_subscriptions.getPtr(subPtr.i); - - suma.c_subscriptions.next(c_subPtr); - - SubCreateReq * req = (SubCreateReq *)signal->getDataPtrSend(); - - req->subscriberRef = suma.reference(); - req->subscriberData = subPtr.i; - req->subscriptionId = subPtr.p->m_subscriptionId; - req->subscriptionKey = subPtr.p->m_subscriptionKey; - req->subscriptionType = subPtr.p->m_subscriptionType | - SubCreateReq::RestartFlag; - - switch (subPtr.p->m_subscriptionType) { - case SubCreateReq::TableEvent: - case SubCreateReq::SelectiveTableSnapshot: - case SubCreateReq::DatabaseSnapshot: { - jam(); - - Ptr<SyncRecord> syncPtr; - suma.c_syncPool.getPtr(syncPtr, subPtr.p->m_syncPtrI); - syncPtr.p->m_tableList.first(syncPtr.p->m_tableList_it); - - ndbrequire(!syncPtr.p->m_tableList_it.isNull()); - - req->tableId = *syncPtr.p->m_tableList_it.data; - -#if 0 - for (int i = 0; i < MAX_TABLES; i++) - if (subPtr.p->m_tables[i]) { - req->tableId = i; - break; - } -#endif - - suma.sendSignal(sumaRef, GSN_SUB_CREATE_REQ, signal, - SubCreateReq::SignalLength+1 /*to get table Id*/, JBB); - return; - } - case SubCreateReq::SingleTableScan : - // TODO - jam(); - return; - } - ndbrequire(false); -} - -void -Suma::execSUB_CREATE_CONF(Signal* signal) { - jamEntry(); -#ifdef NODEFAIL_DEBUG - ndbout_c("Suma::execSUB_CREATE_CONF"); -#endif - - const Uint32 senderRef = signal->senderBlockRef(); - - SubCreateConf * const conf = (SubCreateConf *)signal->getDataPtr(); - - Subscription key; - const Uint32 subscriberData = conf->subscriberData; - key.m_subscriptionId = conf->subscriptionId; - key.m_subscriptionKey = conf->subscriptionKey; - - SubscriptionPtr subPtr; - ndbrequire(c_subscriptions.find(subPtr, key)); - - switch(subPtr.p->m_subscriptionType) { - case SubCreateReq::TableEvent: - case SubCreateReq::SelectiveTableSnapshot: - case SubCreateReq::DatabaseSnapshot: - { - Ptr<SyncRecord> syncPtr; - c_syncPool.getPtr(syncPtr, subPtr.p->m_syncPtrI); - - syncPtr.p->m_tableList.next(syncPtr.p->m_tableList_it); - if (syncPtr.p->m_tableList_it.isNull()) { - jam(); - SubSyncReq *req = (SubSyncReq *)signal->getDataPtrSend(); - - req->subscriptionId = key.m_subscriptionId; - req->subscriptionKey = key.m_subscriptionKey; - req->subscriberData = subscriberData; - req->part = (Uint32) SubscriptionData::MetaData; - - sendSignal(senderRef, GSN_SUB_SYNC_REQ, signal, - SubSyncReq::SignalLength, JBB); - } else { - jam(); - SubCreateReq * req = (SubCreateReq *)signal->getDataPtrSend(); - - req->subscriberRef = reference(); - req->subscriberData = subPtr.i; - req->subscriptionId = subPtr.p->m_subscriptionId; - req->subscriptionKey = subPtr.p->m_subscriptionKey; - req->subscriptionType = subPtr.p->m_subscriptionType | - SubCreateReq::RestartFlag | - SubCreateReq::AddTableFlag; - - req->tableId = *syncPtr.p->m_tableList_it.data; - - sendSignal(senderRef, GSN_SUB_CREATE_REQ, signal, - SubCreateReq::SignalLength+1 /*to get table Id*/, JBB); - } - } - return; - case SubCreateReq::SingleTableScan: - ndbrequire(false); - } - ndbrequire(false); -} - -void -Suma::execSUB_CREATE_REF(Signal* signal) { - jamEntry(); -#ifdef NODEFAIL_DEBUG - ndbout_c("Suma::execSUB_CREATE_REF"); -#endif - //ndbrequire(false); -} - -void -Suma::execSUB_SYNC_CONF(Signal* signal) { - jamEntry(); -#ifdef NODEFAIL_DEBUG - ndbout_c("Suma::execSUB_SYNC_CONF"); -#endif - Uint32 sumaRef = signal->getSendersBlockRef(); - - SubSyncConf *conf = (SubSyncConf *)signal->getDataPtr(); - Subscription key; - - key.m_subscriptionId = conf->subscriptionId; - key.m_subscriptionKey = conf->subscriptionKey; - // SubscriptionData::Part part = (SubscriptionData::Part)conf->part; - // const Uint32 subscriberData = conf->subscriberData; - - SubscriptionPtr subPtr; - c_subscriptions.find(subPtr, key); - - switch(subPtr.p->m_subscriptionType) { - case SubCreateReq::TableEvent: - case SubCreateReq::SelectiveTableSnapshot: - case SubCreateReq::DatabaseSnapshot: - jam(); - Restart.nextSubscription(signal, sumaRef); - return; - case SubCreateReq::SingleTableScan: - ndbrequire(false); - return; - } - ndbrequire(false); -} - -void -Suma::execSUB_SYNC_REF(Signal* signal) { - jamEntry(); -#ifdef NODEFAIL_DEBUG - ndbout_c("Suma::execSUB_SYNC_REF"); -#endif - //ndbrequire(false); -} - -void -Suma::execSUMA_START_ME(Signal* signal) { - jamEntry(); -#ifdef NODEFAIL_DEBUG - ndbout_c("Suma::execSUMA_START_ME"); -#endif - - Restart.runSUMA_START_ME(signal, signal->getSendersBlockRef()); -} - -void -Suma::Restart::runSUMA_START_ME(Signal* signal, Uint32 sumaRef) { - int I = suma.RtoI(sumaRef); - - // restarting Suma is ready for SUB_START_REQ - if (c_waitingToStart[I]) { - // we've waited with startSubscriber since restarting suma was not ready - c_waitingToStart[I] = false; - startSubscriber(signal, sumaRef); - } else { - // do startSubscriber as soon as its time - c_okToStart[I] = true; - } -} - -void -Suma::Restart::completeSubscription(Signal* signal, Uint32 sumaRef) { - jam(); - int I = suma.RtoI(sumaRef); - - if (c_okToStart[I]) {// otherwise will start when START_ME comes - c_okToStart[I] = false; - startSubscriber(signal, sumaRef); - } else { - c_waitingToStart[I] = true; - } -} - -void -Suma::Restart::startSubscriber(Signal* signal, Uint32 sumaRef) { - jam(); - suma.c_dataSubscribers.first(c_subbPtr); - nextSubscriber(signal, sumaRef); -} - -void -Suma::Restart::sendSubStartReq(SubscriptionPtr subPtr, SubscriberPtr subbPtr, - Signal* signal, Uint32 sumaRef) -{ - jam(); - SubStartReq * req = (SubStartReq *)signal->getDataPtrSend(); - - req->senderRef = suma.reference(); - req->senderData = subbPtr.p->m_senderData; - req->subscriptionId = subPtr.p->m_subscriptionId; - req->subscriptionKey = subPtr.p->m_subscriptionKey; - req->part = SubscriptionData::TableData; - req->subscriberData = subbPtr.p->m_subscriberData; - req->subscriberRef = subbPtr.p->m_subscriberRef; - - // restarting suma will not respond to this until startphase 5 - // since it is not until then data copying has been completed -#ifdef NODEFAIL_DEBUG - ndbout_c("Suma::Restart::sendSubStartReq sending GSN_SUB_START_REQ id=%u key=%u", - req->subscriptionId, req->subscriptionKey); -#endif - suma.sendSignal(sumaRef, GSN_SUB_START_REQ, - signal, SubStartReq::SignalLength2, JBB); -} - -void -Suma::execSUB_START_CONF(Signal* signal) { - jamEntry(); -#ifdef NODEFAIL_DEBUG - ndbout_c("Suma::execSUB_START_CONF"); -#endif - Uint32 sumaRef = signal->getSendersBlockRef(); - Restart.nextSubscriber(signal, sumaRef); -} - -void -Suma::execSUB_START_REF(Signal* signal) { - jamEntry(); -#ifdef NODEFAIL_DEBUG - ndbout_c("Suma::execSUB_START_REF"); -#endif - //ndbrequire(false); -} - -void -Suma::Restart::nextSubscriber(Signal* signal, Uint32 sumaRef) { - jam(); - if (c_subbPtr.isNull()) { - jam(); - completeSubscriber(signal, sumaRef); - return; - } - - SubscriberPtr subbPtr = c_subbPtr; - suma.c_dataSubscribers.next(c_subbPtr); - - /* - * get subscription ptr for this subscriber - */ - - SubscriptionPtr subPtr; - suma.c_subscriptions.getPtr(subPtr, subbPtr.p->m_subPtrI); - switch (subPtr.p->m_subscriptionType) { - case SubCreateReq::TableEvent: - case SubCreateReq::SelectiveTableSnapshot: - case SubCreateReq::DatabaseSnapshot: - { - jam(); - sendSubStartReq(subPtr, subbPtr, signal, sumaRef); -#if 0 - SubStartReq * req = (SubStartReq *)signal->getDataPtrSend(); - - req->senderRef = reference(); - req->senderData = subbPtr.p->m_senderData; - req->subscriptionId = subPtr.p->m_subscriptionId; - req->subscriptionKey = subPtr.p->m_subscriptionKey; - req->part = SubscriptionData::TableData; - req->subscriberData = subbPtr.p->m_subscriberData; - req->subscriberRef = subbPtr.p->m_subscriberRef; - - // restarting suma will not respond to this until startphase 5 - // since it is not until then data copying has been completed -#ifdef NODEFAIL_DEBUG - ndbout_c("Suma::nextSubscriber sending GSN_SUB_START_REQ id=%u key=%u", - req->subscriptionId, req->subscriptionKey); -#endif - suma.sendSignal(sumaRef, GSN_SUB_START_REQ, - signal, SubStartReq::SignalLength2, JBB); -#endif - } - return; - case SubCreateReq::SingleTableScan: - ndbrequire(false); - return; - } - ndbrequire(false); -} - -void -Suma::Restart::completeSubscriber(Signal* signal, Uint32 sumaRef) { - completeRestartingNode(signal, sumaRef); -} - -void -Suma::Restart::completeRestartingNode(Signal* signal, Uint32 sumaRef) { - jam(); - SumaHandoverReq * req = (SumaHandoverReq *)signal->getDataPtrSend(); - - req->gci = suma.getFirstGCI(signal); - - suma.sendSignal(sumaRef, GSN_SUMA_HANDOVER_REQ, signal, - SumaHandoverReq::SignalLength, JBB); -} - -// only run on restarting suma - -void -Suma::execSUMA_HANDOVER_REQ(Signal* signal) -{ - jamEntry(); - // Uint32 sumaRef = signal->getSendersBlockRef(); - SumaHandoverReq const * req = (SumaHandoverReq *)signal->getDataPtr(); - - Uint32 gci = req->gci; - Uint32 new_gci = getFirstGCI(signal); - - if (new_gci > gci) { - gci = new_gci; - } - - { // all recreated subscribers at restarting SUMA start at same GCI - SubscriberPtr subbPtr; - for(c_dataSubscribers.first(subbPtr); - !subbPtr.isNull(); - c_dataSubscribers.next(subbPtr)){ - subbPtr.p->m_firstGCI = gci; - } - } - -#ifdef NODEFAIL_DEBUG - ndbout_c("Suma::execSUMA_HANDOVER_REQ, gci = %u", gci); -#endif - - c_handoverToDo = false; - c_restartLock = false; - { -#ifdef HANDOVER_DEBUG - int c = 0; -#endif - for( int i = 0; i < NO_OF_BUCKETS; i++) { - jam(); - if (getResponsibleSumaNodeId(i) == refToNode(reference())) { -#ifdef HANDOVER_DEBUG - c++; -#endif - jam(); - c_buckets[i].active = false; - c_buckets[i].handoverGCI = gci; - c_buckets[i].handover = true; - c_buckets[i].handover_started = false; - c_handoverToDo = true; - } - } -#ifdef HANDOVER_DEBUG - ndbout_c("prepared handover of bucket %u buckets", c); -#endif - } - - for (Uint32 i = 0; i < c_noNodesInGroup; i++) { - jam(); - Uint32 ref = calcSumaBlockRef(c_nodesInGroup[i]); - if (ref != reference()) { - jam(); - sendSignal(ref, GSN_SUMA_HANDOVER_CONF, signal, - SumaHandoverConf::SignalLength, JBB); - }//if - } -} - -// only run on all but restarting suma -void -Suma::execSUMA_HANDOVER_CONF(Signal* signal) { - jamEntry(); - Uint32 sumaRef = signal->getSendersBlockRef(); - SumaHandoverConf const * conf = (SumaHandoverConf *)signal->getDataPtr(); - - Uint32 gci = conf->gci; - -#ifdef HANDOVER_DEBUG - ndbout_c("Suma::execSUMA_HANDOVER_CONF, gci = %u", gci); -#endif - - /* TODO, if we are restarting several SUMA's (>2 in a nodegroup) - * we have to collect all these conf's before proceding - */ - - // restarting node is now prepared and ready - c_preparingNodes.clear(refToNode(sumaRef)); /* !! important to do before - * below since it affects - * getResponsibleSumaNodeId() - */ - - c_handoverToDo = false; - // mark all active buckets really belonging to restarting SUMA - for( int i = 0; i < NO_OF_BUCKETS; i++) { - if (c_buckets[i].active) { - // I'm running this bucket - if (getResponsibleSumaNodeId(i) == refToNode(sumaRef)) { - // but it should really be the restarted node - c_buckets[i].handoverGCI = gci; - c_buckets[i].handover = true; - c_buckets[i].handover_started = false; - c_handoverToDo = true; - } - } - } -} - template void append(DataBuffer<11>&,SegmentedSectionPtr,SectionSegmentPool&); diff --git a/ndb/src/kernel/blocks/suma/Suma.hpp b/ndb/src/kernel/blocks/suma/Suma.hpp index 0bc56e51c4f..5cf1c4d543f 100644 --- a/ndb/src/kernel/blocks/suma/Suma.hpp +++ b/ndb/src/kernel/blocks/suma/Suma.hpp @@ -77,14 +77,6 @@ protected: void execSUB_SYNC_CONTINUE_CONF(Signal* signal); /** - * Trigger logging - */ - void execTRIG_ATTRINFO(Signal* signal); - void execFIRE_TRIG_ORD(Signal* signal); - void execSUB_GCP_COMPLETE_REP(Signal* signal); - void runSUB_GCP_COMPLETE_ACC(Signal* signal); - - /** * DIH signals */ void execDI_FCOUNTREF(Signal* signal); @@ -93,14 +85,6 @@ protected: void execDIGETPRIMCONF(Signal* signal); /** - * Trigger administration - */ - void execCREATE_TRIG_REF(Signal* signal); - void execCREATE_TRIG_CONF(Signal* signal); - void execDROP_TRIG_REF(Signal* signal); - void execDROP_TRIG_CONF(Signal* signal); - - /** * continueb */ void execCONTINUEB(Signal* signal); @@ -190,22 +174,6 @@ public: void completeMeta(Signal*); /** - * Create triggers - */ - Uint32 m_latestTriggerId; - void startTrigger(Signal* signal); - void nextTrigger(Signal* signal); - void completeTrigger(Signal* signal); - void createAttributeMask(AttributeMask&, Table*); - - /** - * Drop triggers - */ - void startDropTrigger(Signal* signal); - void nextDropTrigger(Signal* signal); - void completeDropTrigger(Signal* signal); - - /** * Sync data */ Uint32 m_currentTable; // Index in m_tableList @@ -229,18 +197,12 @@ public: suma.progError(line, cause, extra); } - void runLIST_TABLES_CONF(Signal* signal); void runGET_TABINFO_CONF(Signal* signal); void runGET_TABINFOREF(Signal* signal); void runDI_FCOUNTCONF(Signal* signal); void runDIGETPRIMCONF(Signal* signal); - void runCREATE_TRIG_CONF(Signal* signal); - void runDROP_TRIG_CONF(Signal* signal); - void runDROP_TRIG_REF(Signal* signal); - void runDropTrig(Signal* signal, Uint32 triggerId, Uint32 tableId); - Uint32 ptrI; union { Uint32 nextPool; Uint32 nextList; }; }; @@ -294,24 +256,11 @@ public: Uint32 m_subscriberRef; Uint32 m_subscriberData; Uint32 m_subPtrI; //reference to subscription - Uint32 m_firstGCI; // first GCI to send - Uint32 m_lastGCI; // last acnowledged GCI Uint32 nextList; union { Uint32 nextPool; Uint32 prevList; }; }; typedef Ptr<Subscriber> SubscriberPtr; - struct Bucket { - bool active; - bool handover; - bool handover_started; - Uint32 handoverGCI; - }; -#define NO_OF_BUCKETS 24 - struct Bucket c_buckets[NO_OF_BUCKETS]; - bool c_handoverToDo; - Uint32 c_lastCompleteGCI; - /** * */ @@ -336,25 +285,8 @@ public: DataBuffer<15>::DataBufferPool c_dataBufferPool; /** - * for restarting Suma not to start sending data too early - */ - bool c_restartLock; - - /** - * for flagging that a GCI containg inconsistent data - * typically due to node failiure - */ - - Uint32 c_lastInconsistentGCI; - Uint32 c_nodeFailGCI; - - NodeBitmask c_failedApiNodes; - - /** * Functions */ - bool removeSubscribersOnNode(Signal *signal, Uint32 nodeId); - bool parseTable(Signal* signal, class GetTabInfoConf* conf, Uint32 tableId, SyncRecord* syncPtr_p); bool checkTableTriggers(SegmentedSectionPtr ptr); @@ -365,52 +297,11 @@ public: void sendSubIdRef(Signal* signal, Uint32 errorCode); void sendSubCreateConf(Signal* signal, Uint32 sender, SubscriptionPtr subPtr); void sendSubCreateRef(Signal* signal, const SubCreateReq& req, Uint32 errorCode); - void sendSubStartRef(SubscriptionPtr subPtr, Signal* signal, - Uint32 errorCode, bool temporary = false); - void sendSubStartRef(Signal* signal, - Uint32 errorCode, bool temporary = false); - void sendSubStopRef(Signal* signal, - Uint32 errorCode, bool temporary = false); void sendSubSyncRef(Signal* signal, Uint32 errorCode); void sendSubRemoveRef(Signal* signal, const SubRemoveReq& ref, Uint32 errorCode, bool temporary = false); - void sendSubStartComplete(Signal*, SubscriberPtr, Uint32, - SubscriptionData::Part); - void sendSubStopComplete(Signal*, SubscriberPtr); - void sendSubStopReq(Signal* signal); - void completeSubRemoveReq(Signal* signal, SubscriptionPtr subPtr); - Uint32 getFirstGCI(Signal* signal); - Uint32 decideWhoToSend(Uint32 nBucket, Uint32 gci); - - virtual Uint32 getStoreBucket(Uint32 v) = 0; - virtual Uint32 getResponsibleSumaNodeId(Uint32 D) = 0; - virtual Uint32 RtoI(Uint32 sumaRef, bool dieOnNotFound = true) = 0; - - struct FailoverBuffer { - // FailoverBuffer(DataBuffer<15>::DataBufferPool & p); - FailoverBuffer(); - - bool subTableData(Uint32 gci, Uint32 *src, int sz); - bool subGcpCompleteRep(Uint32 gci); - bool nodeFailRep(); - - // typedef DataBuffer<15> GCIDataBuffer; - // GCIDataBuffer m_GCIDataBuffer; - // GCIDataBuffer::DataBufferIterator m_GCIDataBuffer_it; - - Uint32 *c_gcis; - int c_sz; - - // Uint32 *c_buf; - // int c_buf_sz; - - int c_first; - int c_next; - bool c_full; - } c_failoverBuffer; - /** * Table admin */ @@ -441,7 +332,7 @@ private: * Framework signals */ - void getNodeGroupMembers(Signal* signal); + void execREAD_CONFIG_REQ(Signal* signal); void execSTTOR(Signal* signal); void sendSTTORRY(Signal*); @@ -452,35 +343,13 @@ private: void execINCL_NODEREQ(Signal* signal); void execCONTINUEB(Signal* signal); void execSIGNAL_DROPPED_REP(Signal* signal); - void execAPI_FAILREQ(Signal* signal) ; - - void execSUB_GCP_COMPLETE_ACC(Signal* signal); /** * Controller interface */ - void execSUB_CREATE_REF(Signal* signal); - void execSUB_CREATE_CONF(Signal* signal); - - void execSUB_DROP_REF(Signal* signal); - void execSUB_DROP_CONF(Signal* signal); - - void execSUB_START_REF(Signal* signal); - void execSUB_START_CONF(Signal* signal); - - void execSUB_STOP_REF(Signal* signal); - void execSUB_STOP_CONF(Signal* signal); - - void execSUB_SYNC_REF(Signal* signal); - void execSUB_SYNC_CONF(Signal* signal); - void execSUB_ABORT_SYNC_REF(Signal* signal); void execSUB_ABORT_SYNC_CONF(Signal* signal); - void execSUMA_START_ME(Signal* signal); - void execSUMA_HANDOVER_REQ(Signal* signal); - void execSUMA_HANDOVER_CONF(Signal* signal); - /** * Subscription generation interface */ @@ -492,49 +361,6 @@ private: void execUTIL_SEQUENCE_REF(Signal* signal); void execCREATE_SUBID_REQ(Signal* signal); - Uint32 getStoreBucket(Uint32 v); - Uint32 getResponsibleSumaNodeId(Uint32 D); - - /** - * for Suma that is restarting another - */ - - struct Restart { - Restart(Suma& s); - - Suma & suma; - - bool c_okToStart[MAX_REPLICAS]; - bool c_waitingToStart[MAX_REPLICAS]; - - DLHashTable<SumaParticipant::Subscription>::Iterator c_subPtr; // TODO [MAX_REPLICAS] - SubscriberPtr c_subbPtr; // TODO [MAX_REPLICAS] - - void progError(int line, int cause, const char * extra) { - suma.progError(line, cause, extra); - } - - void resetNode(Uint32 sumaRef); - void runSUMA_START_ME(Signal*, Uint32 sumaRef); - void startNode(Signal*, Uint32 sumaRef); - - void createSubscription(Signal* signal, Uint32 sumaRef); - void nextSubscription(Signal* signal, Uint32 sumaRef); - void completeSubscription(Signal* signal, Uint32 sumaRef); - - void startSync(Signal* signal, Uint32 sumaRef); - void nextSync(Signal* signal, Uint32 sumaRef); - void completeSync(Signal* signal, Uint32 sumaRef); - - void sendSubStartReq(SubscriptionPtr subPtr, SubscriberPtr subbPtr, - Signal* signal, Uint32 sumaRef); - void startSubscriber(Signal* signal, Uint32 sumaRef); - void nextSubscriber(Signal* signal, Uint32 sumaRef); - void completeSubscriber(Signal* signal, Uint32 sumaRef); - - void completeRestartingNode(Signal* signal, Uint32 sumaRef); - } Restart; - private: friend class Restart; struct SubCoordinator { @@ -588,14 +414,4 @@ private: DLList<SubCoordinator> c_runningSubscriptions; }; -inline Uint32 -Suma::RtoI(Uint32 sumaRef, bool dieOnNotFound) { - for (Uint32 i = 0; i < c_noNodesInGroup; i++) { - if (sumaRef == calcSumaBlockRef(c_nodesInGroup[i])) - return i; - } - ndbrequire(!dieOnNotFound); - return RNIL; -} - #endif diff --git a/ndb/src/kernel/blocks/suma/SumaInit.cpp b/ndb/src/kernel/blocks/suma/SumaInit.cpp index b5945db3811..ae7425da4bf 100644 --- a/ndb/src/kernel/blocks/suma/SumaInit.cpp +++ b/ndb/src/kernel/blocks/suma/SumaInit.cpp @@ -35,19 +35,11 @@ SumaParticipant::SumaParticipant(const Configuration & conf) : */ addRecSignal(GSN_SUB_CREATE_REQ, &SumaParticipant::execSUB_CREATE_REQ); addRecSignal(GSN_SUB_REMOVE_REQ, &SumaParticipant::execSUB_REMOVE_REQ); - addRecSignal(GSN_SUB_START_REQ, &SumaParticipant::execSUB_START_REQ); - addRecSignal(GSN_SUB_STOP_REQ, &SumaParticipant::execSUB_STOP_REQ); addRecSignal(GSN_SUB_SYNC_REQ, &SumaParticipant::execSUB_SYNC_REQ); - addRecSignal(GSN_SUB_STOP_CONF, &SumaParticipant::execSUB_STOP_CONF); - addRecSignal(GSN_SUB_STOP_REF, &SumaParticipant::execSUB_STOP_REF); - /** * Dict interface */ - //addRecSignal(GSN_LIST_TABLES_REF, &SumaParticipant::execLIST_TABLES_REF); - addRecSignal(GSN_LIST_TABLES_CONF, &SumaParticipant::execLIST_TABLES_CONF); - //addRecSignal(GSN_GET_TABINFOREF, &SumaParticipant::execGET_TABINFO_REF); addRecSignal(GSN_GET_TABINFO_CONF, &SumaParticipant::execGET_TABINFO_CONF); addRecSignal(GSN_GET_TABINFOREF, &SumaParticipant::execGET_TABINFOREF); #if 0 @@ -76,60 +68,6 @@ SumaParticipant::SumaParticipant(const Configuration & conf) : addRecSignal(GSN_SUB_SYNC_CONTINUE_CONF, &SumaParticipant::execSUB_SYNC_CONTINUE_CONF); - /** - * Trigger stuff - */ - addRecSignal(GSN_TRIG_ATTRINFO, &SumaParticipant::execTRIG_ATTRINFO); - addRecSignal(GSN_FIRE_TRIG_ORD, &SumaParticipant::execFIRE_TRIG_ORD); - - addRecSignal(GSN_CREATE_TRIG_REF, &Suma::execCREATE_TRIG_REF); - addRecSignal(GSN_CREATE_TRIG_CONF, &Suma::execCREATE_TRIG_CONF); - addRecSignal(GSN_DROP_TRIG_REF, &Suma::execDROP_TRIG_REF); - addRecSignal(GSN_DROP_TRIG_CONF, &Suma::execDROP_TRIG_CONF); - - addRecSignal(GSN_SUB_GCP_COMPLETE_REP, - &SumaParticipant::execSUB_GCP_COMPLETE_REP); - - /** - * @todo: fix pool sizes - */ - Uint32 noTables; - const ndb_mgm_configuration_iterator * p = conf.getOwnConfigIterator(); - ndbrequire(p != 0); - - ndb_mgm_get_int_parameter(p, CFG_DB_NO_TABLES, - &noTables); - - c_tablePool_.setSize(noTables); - c_tables.setSize(noTables); - - c_subscriptions.setSize(20); //10 - c_subscriberPool.setSize(64); - - c_subscriptionPool.setSize(64); //2 - c_syncPool.setSize(20); //2 - c_dataBufferPool.setSize(128); - - { - SLList<SyncRecord> tmp(c_syncPool); - Ptr<SyncRecord> ptr; - while(tmp.seize(ptr)) - new (ptr.p) SyncRecord(* this, c_dataBufferPool); - tmp.release(); - } - - for( int i = 0; i < NO_OF_BUCKETS; i++) { - c_buckets[i].active = false; - c_buckets[i].handover = false; - c_buckets[i].handover_started = false; - c_buckets[i].handoverGCI = 0; - } - c_handoverToDo = false; - c_lastInconsistentGCI = RNIL; - c_lastCompleteGCI = RNIL; - c_nodeFailGCI = 0; - - c_failedApiNodes.clear(); } SumaParticipant::~SumaParticipant() @@ -138,49 +76,21 @@ SumaParticipant::~SumaParticipant() Suma::Suma(const Configuration & conf) : SumaParticipant(conf), - Restart(*this), c_nodes(c_nodePool), c_runningSubscriptions(c_subCoordinatorPool) { - - c_nodePool.setSize(MAX_NDB_NODES); - c_masterNodeId = getOwnNodeId(); - - c_nodeGroup = c_noNodesInGroup = c_idInNodeGroup = 0; - for (int i = 0; i < MAX_REPLICAS; i++) { - c_nodesInGroup[i] = 0; - } - - c_subCoordinatorPool.setSize(10); - // Add received signals + addRecSignal(GSN_READ_CONFIG_REQ, &Suma::execREAD_CONFIG_REQ); addRecSignal(GSN_STTOR, &Suma::execSTTOR); addRecSignal(GSN_NDB_STTOR, &Suma::execNDB_STTOR); addRecSignal(GSN_DUMP_STATE_ORD, &Suma::execDUMP_STATE_ORD); addRecSignal(GSN_READ_NODESCONF, &Suma::execREAD_NODESCONF); - addRecSignal(GSN_API_FAILREQ, &Suma::execAPI_FAILREQ); - addRecSignal(GSN_NODE_FAILREP, &Suma::execNODE_FAILREP); - addRecSignal(GSN_INCL_NODEREQ, &Suma::execINCL_NODEREQ); addRecSignal(GSN_CONTINUEB, &Suma::execCONTINUEB); addRecSignal(GSN_SIGNAL_DROPPED_REP, &Suma::execSIGNAL_DROPPED_REP, true); addRecSignal(GSN_UTIL_SEQUENCE_CONF, &Suma::execUTIL_SEQUENCE_CONF); addRecSignal(GSN_UTIL_SEQUENCE_REF, &Suma::execUTIL_SEQUENCE_REF); addRecSignal(GSN_CREATE_SUBID_REQ, &Suma::execCREATE_SUBID_REQ); - - addRecSignal(GSN_SUB_CREATE_CONF, &Suma::execSUB_CREATE_CONF); - addRecSignal(GSN_SUB_CREATE_REF, &Suma::execSUB_CREATE_REF); - addRecSignal(GSN_SUB_SYNC_CONF, &Suma::execSUB_SYNC_CONF); - addRecSignal(GSN_SUB_SYNC_REF, &Suma::execSUB_SYNC_REF); - addRecSignal(GSN_SUB_START_CONF, &Suma::execSUB_START_CONF); - addRecSignal(GSN_SUB_START_REF, &Suma::execSUB_START_REF); - - addRecSignal(GSN_SUMA_START_ME, &Suma::execSUMA_START_ME); - addRecSignal(GSN_SUMA_HANDOVER_REQ, &Suma::execSUMA_HANDOVER_REQ); - addRecSignal(GSN_SUMA_HANDOVER_CONF, &Suma::execSUMA_HANDOVER_CONF); - - addRecSignal(GSN_SUB_GCP_COMPLETE_ACC, - &Suma::execSUB_GCP_COMPLETE_ACC); } Suma::~Suma() diff --git a/ndb/src/kernel/blocks/trix/Trix.cpp b/ndb/src/kernel/blocks/trix/Trix.cpp index cd11cb4d575..1d6e5adad62 100644 --- a/ndb/src/kernel/blocks/trix/Trix.cpp +++ b/ndb/src/kernel/blocks/trix/Trix.cpp @@ -52,6 +52,7 @@ Trix::Trix(const Configuration & conf) : BLOCK_CONSTRUCTOR(Trix); // Add received signals + addRecSignal(GSN_READ_CONFIG_REQ, &Trix::execREAD_CONFIG_REQ); addRecSignal(GSN_STTOR, &Trix::execSTTOR); addRecSignal(GSN_NDB_STTOR, &Trix::execNDB_STTOR); // Forwarded from DICT addRecSignal(GSN_READ_NODESCONF, &Trix::execREAD_NODESCONF); @@ -85,6 +86,28 @@ Trix::Trix(const Configuration & conf) : addRecSignal(GSN_SUB_SYNC_CONTINUE_REQ, &Trix::execSUB_SYNC_CONTINUE_REQ); addRecSignal(GSN_SUB_META_DATA, &Trix::execSUB_META_DATA); addRecSignal(GSN_SUB_TABLE_DATA, &Trix::execSUB_TABLE_DATA); +} + +/** + * + */ +Trix::~Trix() +{ +} + +void +Trix::execREAD_CONFIG_REQ(Signal* signal) +{ + jamEntry(); + + const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr(); + + Uint32 ref = req->senderRef; + Uint32 senderData = req->senderData; + + const ndb_mgm_configuration_iterator * p = + theConfiguration.getOwnConfigIterator(); + ndbrequire(p != 0); // Allocate pool sizes c_theAttrOrderBufferPool.setSize(100); @@ -96,13 +119,12 @@ Trix::Trix(const Configuration & conf) : new (subptr.p) SubscriptionRecord(c_theAttrOrderBufferPool); } subscriptions.release(); -} -/** - * - */ -Trix::~Trix() -{ + ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend(); + conf->senderRef = reference(); + conf->senderData = senderData; + sendSignal(ref, GSN_READ_CONFIG_CONF, signal, + ReadConfigConf::SignalLength, JBB); } /** diff --git a/ndb/src/kernel/blocks/trix/Trix.hpp b/ndb/src/kernel/blocks/trix/Trix.hpp index 8dc01375fa1..78c5b8b35c3 100644 --- a/ndb/src/kernel/blocks/trix/Trix.hpp +++ b/ndb/src/kernel/blocks/trix/Trix.hpp @@ -139,6 +139,7 @@ private: ArrayList<SubscriptionRecord> c_theSubscriptions; // System start + void execREAD_CONFIG_REQ(Signal* signal); void execSTTOR(Signal* signal); void execNDB_STTOR(Signal* signal); diff --git a/ndb/src/kernel/error/Error.hpp b/ndb/src/kernel/error/Error.hpp deleted file mode 100644 index e19d6782793..00000000000 --- a/ndb/src/kernel/error/Error.hpp +++ /dev/null @@ -1,85 +0,0 @@ -/* Copyright (C) 2003 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -#ifndef ERROR_H -#define ERROR_H - -/** - * Errorcodes for NDB - * - * These errorcodes should be used whenever a condition - * is detected where it's necesssary to shutdown NDB. - * - * Example: When another node fails while a NDB node are performing - * a system restart the node should be shutdown. This - * is kind of an error but the cause of the error is known - * and a proper errormessage describing the problem should - * be printed in error.log. It's therefore important to use - * the proper errorcode. - * - * TODO: In the future the errorcodes should be classified - * - */ - -enum ErrorCategory -{ - warning, - ecError, - fatal, - assert -}; - -const int ERR_BASE = 1000; - -// Errorcodes for all blocks except filseystem -const int ERR_ERR_BASE = ERR_BASE + 1300; -const int ERR_ERROR_PRGERR = ERR_ERR_BASE+1; -const int ERR_NODE_NOT_IN_CONFIG = ERR_ERR_BASE+2; -const int ERR_SYSTEM_ERROR = ERR_ERR_BASE+3; -const int ERR_INDEX_NOTINRANGE = ERR_ERR_BASE+4; -const int ERR_ARBIT_SHUTDOWN = ERR_ERR_BASE+5; -const int ERR_POINTER_NOTINRANGE = ERR_ERR_BASE+6; -const int ERR_PROGRAMERROR = ERR_ERR_BASE+7; -const int ERR_SR_OTHERNODEFAILED = ERR_ERR_BASE+8; -const int ERR_NODE_NOT_DEAD = ERR_ERR_BASE+9; -const int ERR_SR_REDOLOG = ERR_ERR_BASE+10; -const int ERR_SR_RESTARTCONFLICT = ERR_ERR_BASE+11; -const int ERR_NO_MORE_UNDOLOG = ERR_ERR_BASE+12; -const int ERR_SR_UNDOLOG = ERR_ERR_BASE+13; -const int ERR_MEMALLOC = ERR_ERR_BASE+27; -const int BLOCK_ERROR_JBUFCONGESTION = ERR_ERR_BASE+34; -const int ERROR_TIME_QUEUE_SHORT = ERR_ERR_BASE+35; -const int ERROR_TIME_QUEUE_LONG = ERR_ERR_BASE+36; -const int ERROR_TIME_QUEUE_DELAY = ERR_ERR_BASE+37; -const int ERROR_TIME_QUEUE_INDEX = ERR_ERR_BASE+38; -const int BLOCK_ERROR_BNR_ZERO = ERR_ERR_BASE+39; -const int ERROR_WRONG_PRIO_LEVEL = ERR_ERR_BASE+40; -const int ERR_NDBREQUIRE = ERR_ERR_BASE+41; -const int ERR_ERROR_INSERT = ERR_ERR_BASE+42; -const int ERR_INVALID_CONFIG = ERR_ERR_BASE+50; -const int ERR_OUT_OF_LONG_SIGNAL_MEMORY = ERR_ERR_BASE+51; - -// Errorcodes for NDB filesystem -const int AFS_ERR_BASE = ERR_BASE + 1800; -const int AFS_ERROR_NOPATH = AFS_ERR_BASE+1; -const int AFS_ERROR_CHANNALFULL = AFS_ERR_BASE+2; -const int AFS_ERROR_NOMORETHREADS = AFS_ERR_BASE+3; -const int AFS_ERROR_PARAMETER = AFS_ERR_BASE+4; -const int AFS_ERROR_INVALIDPATH = AFS_ERR_BASE+5; -const int AFS_ERROR_MAXOPEN = AFS_ERR_BASE+6; -const int AFS_ERROR_ALLREADY_OPEN = AFS_ERR_BASE+7; - -#endif // ERROR_H diff --git a/ndb/src/kernel/error/ErrorHandlingMacros.hpp b/ndb/src/kernel/error/ErrorHandlingMacros.hpp index d8bb7ff759b..8c3454b1ba1 100644 --- a/ndb/src/kernel/error/ErrorHandlingMacros.hpp +++ b/ndb/src/kernel/error/ErrorHandlingMacros.hpp @@ -17,22 +17,27 @@ #ifndef ERRORHANDLINGMACROS_H #define ERRORHANDLINGMACROS_H +#include <ndbd_exit_codes.h> #include "ErrorReporter.hpp" -#include "Error.hpp" extern const char programName[]; -#define ERROR_SET_SIGNAL(messageCategory, messageID, problemData, objectRef) \ - ErrorReporter::handleError(messageCategory, messageID, problemData, objectRef, NST_ErrorHandlerSignal) -#define ERROR_SET(messageCategory, messageID, problemData, objectRef) \ - ErrorReporter::handleError(messageCategory, messageID, problemData, objectRef) +enum NotUsed +{ + warning, + ecError, + fatal, + assert +}; + +#define ERROR_SET_SIGNAL(not_used, messageID, problemData, objectRef) \ + ErrorReporter::handleError(messageID, problemData, objectRef, NST_ErrorHandlerSignal) +#define ERROR_SET(not_used, messageID, problemData, objectRef) \ + ErrorReporter::handleError(messageID, problemData, objectRef) // Description: // Call ErrorHandler with the supplied arguments. The // ErrorHandler decides how to report the error. // Parameters: - // messageCategory IN A hint to the error handler how the - // error should be reported. Can be - // error, fatal (or warning, use WARNING_SET instead). // messageID IN Code identifying the error. If less // than 1000 a unix error is assumed. If // greater than 1000 the code is treated diff --git a/ndb/src/kernel/error/ErrorMessages.cpp b/ndb/src/kernel/error/ErrorMessages.cpp deleted file mode 100644 index 059aa4af61c..00000000000 --- a/ndb/src/kernel/error/ErrorMessages.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright (C) 2003 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -#include "ErrorMessages.hpp" - -struct ErrStruct { - int fauldId; - const char* text; -}; - -const ErrStruct errArray[] = { - - {2301, "Assertion, probably a programming error"}, - {2302, "Own Node Id not a NDB node, configuration error"}, - {2303, "System error"}, - {2304, "Index too large"}, - {2305, "Arbitrator shutdown"}, - {2306, "Pointer too large"}, - {2307, "Internal program error"}, - {2308, "Node failed during system restart"}, - {2309, "Node state conflict"}, - {2310, "Error while reading the REDO log"}, - {2311, "Conflict when selecting restart type"}, - {2312, "No more free UNDO log"}, - {2313, "Error while reading the datapages and UNDO log"}, - {2327, "Memory allocation failure"}, - {2334, "Job buffer congestion"}, - {2335, "Error in short time queue"}, - {2336, "Error in long time queue"}, - {2337, "Error in time queue, too long delay"}, - {2338, "Time queue index out of range"}, - {2339, "Send signal error"}, - {2340, "Wrong prio level when sending signal"}, - {2341, "Internal program error (failed ndbrequire)"}, - {2342, "Error insert executed" }, - {2350, "Invalid Configuration fetched from Management Server" }, - - // Ndbfs error messages - {2801, "No file system path"}, - {2802, "Channel is full"}, - {2803, "No more threads"}, - {2804, "Bad parameter"}, - {2805, "Illegal file system path"}, - {2806, "Max number of open files exceeded"}, - {2807, "File has already been opened"}, - - // Sentinel - {0, "No message slogan found"} - -}; - -const unsigned short NO_OF_ERROR_MESSAGES = sizeof(errArray)/sizeof(ErrStruct); - -const char* lookupErrorMessage(int faultId) -{ - int i = 0; - while (errArray[i].fauldId != faultId && errArray[i].fauldId != 0) - i++; - return errArray[i].text; -} - - diff --git a/ndb/src/kernel/error/ErrorReporter.cpp b/ndb/src/kernel/error/ErrorReporter.cpp index 25409db48a8..6c8bb1fe615 100644 --- a/ndb/src/kernel/error/ErrorReporter.cpp +++ b/ndb/src/kernel/error/ErrorReporter.cpp @@ -17,9 +17,8 @@ #include <ndb_global.h> -#include "Error.hpp" +#include <ndbd_exit_codes.h> #include "ErrorReporter.hpp" -#include "ErrorMessages.hpp" #include <FastScheduler.hpp> #include <DebuggerNames.hpp> @@ -29,17 +28,9 @@ #include <NdbAutoPtr.hpp> -#define MESSAGE_LENGTH 400 +#define MESSAGE_LENGTH 500 -const char* errorType[] = { - "warning", - "error", - "fatal", - "assert" -}; - - -static int WriteMessage(ErrorCategory thrdType, int thrdMessageID, +static int WriteMessage(int thrdMessageID, const char* thrdProblemData, const char* thrdObjRef, Uint32 thrdTheEmulatedJamIndex, @@ -116,24 +107,35 @@ ErrorReporter::get_trace_no(){ void -ErrorReporter::formatMessage(ErrorCategory type, - int faultID, +ErrorReporter::formatMessage(int faultID, const char* problemData, const char* objRef, const char* theNameOfTheTraceFile, char* messptr){ int processId; - + ndbd_exit_classification cl; + ndbd_exit_status st; + const char *exit_msg = ndbd_exit_message(faultID, &cl); + const char *exit_cl_msg = ndbd_exit_classification_message(cl, &st); + const char *exit_st_msg = ndbd_exit_status_message(st); + processId = NdbHost_GetProcessId(); BaseString::snprintf(messptr, MESSAGE_LENGTH, - "Date/Time: %s\nType of error: %s\n" - "Message: %s\nFault ID: %d\nProblem data: %s" - "\nObject of reference: %s\nProgramName: %s\n" - "ProcessID: %d\nTraceFile: %s\n%s\n***EOM***\n", + "Time: %s\n" + "Status: %s\n" + "Message: %s (%s)\n" + "Error: %d\n" + "Error data: %s\n" + "Error object: %s\n" + "Program: %s\n" + "Pid: %d\n" + "Trace: %s\n" + "Version: %s\n" + "***EOM***\n", formatTimeStampString() , - errorType[type], - lookupErrorMessage(faultID), + exit_st_msg, + exit_msg, exit_cl_msg, faultID, (problemData == NULL) ? "" : problemData, objRef, @@ -160,8 +162,10 @@ ErrorReporter::setErrorHandlerShutdownType(NdbShutdownType nst) s_errorHandlerShutdownType = nst; } +void childReportError(int error); + void -ErrorReporter::handleAssert(const char* message, const char* file, int line) +ErrorReporter::handleAssert(const char* message, const char* file, int line, int ec) { char refMessage[100]; @@ -175,38 +179,26 @@ ErrorReporter::handleAssert(const char* message, const char* file, int line) BaseString::snprintf(refMessage, 100, "%s line: %d (block: %s)", file, line, blockName); #endif - WriteMessage(assert, ERR_ERROR_PRGERR, message, refMessage, + WriteMessage(ec, message, refMessage, theEmulatedJamIndex, theEmulatedJam); - NdbShutdown(s_errorHandlerShutdownType); -} + childReportError(ec); -void -ErrorReporter::handleThreadAssert(const char* message, - const char* file, - int line) -{ - char refMessage[100]; - BaseString::snprintf(refMessage, 100, "file: %s lineNo: %d - %s", - file, line, message); - NdbShutdown(s_errorHandlerShutdownType); -}//ErrorReporter::handleThreadAssert() - +} void -ErrorReporter::handleError(ErrorCategory type, int messageID, +ErrorReporter::handleError(int messageID, const char* problemData, const char* objRef, NdbShutdownType nst) { - type = ecError; - // The value for type is not always set correctly in the calling function. - // So, to correct this, we set it set it to the value corresponding to - // the function that is called. - WriteMessage(type, messageID, problemData, + WriteMessage(messageID, problemData, objRef, theEmulatedJamIndex, theEmulatedJam); - if(messageID == ERR_ERROR_INSERT){ + + childReportError(messageID); + + if(messageID == NDBD_EXIT_ERROR_INSERT){ NdbShutdown(NST_ErrorInsert); } else { if (nst == NST_ErrorHandler) @@ -216,7 +208,7 @@ ErrorReporter::handleError(ErrorCategory type, int messageID, } int -WriteMessage(ErrorCategory thrdType, int thrdMessageID, +WriteMessage(int thrdMessageID, const char* thrdProblemData, const char* thrdObjRef, Uint32 thrdTheEmulatedJamIndex, Uint8 thrdTheEmulatedJam[]){ @@ -257,7 +249,7 @@ WriteMessage(ErrorCategory thrdType, int thrdMessageID, " \n\n\n"); // ...and write the error-message... - ErrorReporter::formatMessage(thrdType, thrdMessageID, + ErrorReporter::formatMessage(thrdMessageID, thrdProblemData, thrdObjRef, theTraceFileName, theMessage); fprintf(stream, "%s", theMessage); @@ -284,7 +276,7 @@ WriteMessage(ErrorCategory thrdType, int thrdMessageID, fseek(stream, offset, SEEK_SET); // ...and write the error-message there... - ErrorReporter::formatMessage(thrdType, thrdMessageID, + ErrorReporter::formatMessage(thrdMessageID, thrdProblemData, thrdObjRef, theTraceFileName, theMessage); fprintf(stream, "%s", theMessage); diff --git a/ndb/src/kernel/error/ErrorReporter.hpp b/ndb/src/kernel/error/ErrorReporter.hpp index c5533df46f4..0ec84190238 100644 --- a/ndb/src/kernel/error/ErrorReporter.hpp +++ b/ndb/src/kernel/error/ErrorReporter.hpp @@ -18,9 +18,9 @@ #define ERRORREPORTER_H #include <ndb_global.h> +#include <ndbd_exit_codes.h> #include "TimeModule.hpp" -#include "Error.hpp" #include <Emulator.hpp> class ErrorReporter @@ -29,25 +29,18 @@ public: static void setErrorHandlerShutdownType(NdbShutdownType nst = NST_ErrorHandler); static void handleAssert(const char* message, const char* file, - int line); + int line, int ec = NDBD_EXIT_PRGERR); - static void handleThreadAssert(const char* message, - const char* file, - int line); - - static void handleError(ErrorCategory type, - int faultID, + static void handleError(int faultID, const char* problemData, const char* objRef, enum NdbShutdownType = NST_ErrorHandler); - static void handleWarning(ErrorCategory type, - int faultID, + static void handleWarning(int faultID, const char* problemData, const char* objRef); - static void formatMessage(ErrorCategory type, - int faultID, + static void formatMessage(int faultID, const char* problemData, const char* objRef, const char* theNameOfTheTraceFile, diff --git a/ndb/src/kernel/error/Makefile.am b/ndb/src/kernel/error/Makefile.am index 54f3de2d76d..c58cdf80940 100644 --- a/ndb/src/kernel/error/Makefile.am +++ b/ndb/src/kernel/error/Makefile.am @@ -2,7 +2,7 @@ noinst_LIBRARIES = liberror.a liberror_a_SOURCES = TimeModule.cpp \ ErrorReporter.cpp \ - ErrorMessages.cpp + ndbd_exit_codes.c include $(top_srcdir)/ndb/config/common.mk.am include $(top_srcdir)/ndb/config/type_kernel.mk.am diff --git a/ndb/src/kernel/error/ndbd_exit_codes.c b/ndb/src/kernel/error/ndbd_exit_codes.c new file mode 100644 index 00000000000..07b276346a0 --- /dev/null +++ b/ndb/src/kernel/error/ndbd_exit_codes.c @@ -0,0 +1,261 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <ndbd_exit_codes.h> + +typedef struct ErrStruct { + int faultId; + ndbd_exit_classification classification; + const char* text; +} ErrStruct; + +/** + * Shorter names in table below + */ + +#define XST_S ndbd_exit_st_success +#define XST_U ndbd_exit_st_unknown +#define XST_P ndbd_exit_st_permanent +#define XST_R ndbd_exit_st_temporary +#define XST_I ndbd_exit_st_filesystem_error + +#define XNE ndbd_exit_cl_none +#define XUE ndbd_exit_cl_unknown +#define XIE ndbd_exit_cl_internal_error +#define XCE ndbd_exit_cl_configuration_error +#define XAE ndbd_exit_cl_arbitration_error +#define XRE ndbd_exit_cl_restart_error +#define XCR ndbd_exit_cl_resource_configuration_error +#define XFF ndbd_exit_cl_filesystem_full_error +#define XFI ndbd_exit_cl_filesystem_inconsistency_error +#define XFL ndbd_exit_cl_filesystem_limit + +static const ErrStruct errArray[] = +{ + {NDBD_EXIT_PRGERR, XIE, "Assertion"}, + {NDBD_EXIT_NODE_NOT_IN_CONFIG, XCE, + "node id in the configuration has the wrong type, (i.e. not an NDB node)"}, + {NDBD_EXIT_SYSTEM_ERROR, XIE, + "System error, node killed during node restart by other node"}, + {NDBD_EXIT_INDEX_NOTINRANGE, XIE, "Array index out of range"}, + {NDBD_EXIT_ARBIT_SHUTDOWN, XAE, "Node lost connection to other nodes and " + "can not form a unpartitioned cluster, please investigate if there are " + "error(s) on other node(s)"}, + {NDBD_EXIT_PARTITIONED_SHUTDOWN, XAE, "Partitioned cluster detected. " + "Please check if cluster is already running"}, + {NDBD_EXIT_POINTER_NOTINRANGE, XIE, "Pointer too large"}, + {NDBD_EXIT_SR_OTHERNODEFAILED, XRE, "Another node failed during system " + "restart, please investigate error(s) on other node(s)"}, + {NDBD_EXIT_NODE_NOT_DEAD, XRE, "Internal node state conflict, " + "most probably resolved by restarting node again"}, + {NDBD_EXIT_SR_REDOLOG, XFI, "Error while reading the REDO log"}, + /* Currently unused? */ + {2311, XIE, "Conflict when selecting restart type"}, + {NDBD_EXIT_NO_MORE_UNDOLOG, XCR, + "No more free UNDO log, increase UndoIndexBuffer"}, + {NDBD_EXIT_SR_UNDOLOG, XFI, + "Error while reading the datapages and UNDO log"}, + {NDBD_EXIT_MEMALLOC, XCE, "Memory allocation failure, " + "please decrease some configuration parameters"}, + {NDBD_EXIT_BLOCK_JBUFCONGESTION, XIE, "Job buffer congestion"}, + {NDBD_EXIT_TIME_QUEUE_SHORT, XIE, "Error in short time queue"}, + {NDBD_EXIT_TIME_QUEUE_LONG, XIE, "Error in long time queue"}, + {NDBD_EXIT_TIME_QUEUE_DELAY, XIE, "Error in time queue, too long delay"}, + {NDBD_EXIT_TIME_QUEUE_INDEX, XIE, "Time queue index out of range"}, + {NDBD_EXIT_BLOCK_BNR_ZERO, XIE, "Send signal error"}, + {NDBD_EXIT_WRONG_PRIO_LEVEL, XIE, "Wrong priority level when sending signal"}, + {NDBD_EXIT_NDBREQUIRE, XIE, "Internal program error (failed ndbrequire)"}, + {NDBD_EXIT_NDBASSERT, XIE, "Internal program error (failed ndbassert)"}, + {NDBD_EXIT_ERROR_INSERT, XNE, "Error insert executed" }, + /* this error message is complemented by additional info when generated */ + {NDBD_EXIT_INVALID_CONFIG, XCE, + "Invalid configuration received from Management Server"}, + /* this error message is complemented by additional info when + generated, such as signal, and text + */ + {NDBD_EXIT_OS_SIGNAL_RECEIVED, XIE, "Error OS signal received"}, + + {NDBD_EXIT_SR_RESTARTCONFLICT, XRE, + "Partial system restart causing conflicting file systems"}, + + /* VM */ + {NDBD_EXIT_OUT_OF_LONG_SIGNAL_MEMORY, XCR, + "Signal lost, out of long signal memory, please increase LongMessageBuffer"}, + {NDBD_EXIT_WATCHDOG_TERMINATE, XIE, "WatchDog terminate, internal error " + "or massive overload on the machine running this node"}, + {NDBD_EXIT_SIGNAL_LOST_SEND_BUFFER_FULL, XCR, + "Signal lost, out of send buffer memory, please increase SendBufferMemory or lower the load"}, + {NDBD_EXIT_SIGNAL_LOST, XIE, "Signal lost (unknown reason)"}, + {NDBD_EXIT_ILLEGAL_SIGNAL, XIE, + "Illegal signal (version mismatch a possibility)"}, + {NDBD_EXIT_CONNECTION_SETUP_FAILED, XCE, "Connection setup failed"}, + + /* Ndbcntr */ + {NDBD_EXIT_RESTART_TIMEOUT, XCE, + "Total restart time too long, consider increasing StartFailureTimeout " + "or investigate error(s) on other node(s)"}, + {NDBD_EXIT_RESTART_DURING_SHUTDOWN, XRE, + "Node started while node shutdown in progress. " + "Please wait until shutdown complete before starting node"}, + + /* DIH */ + {NDBD_EXIT_MAX_CRASHED_REPLICAS, XFL, + "Too many crashed replicas (8 consecutive node restart failures)"}, + {NDBD_EXIT_MASTER_FAILURE_DURING_NR, XRE, + "Unhandled master failure during node restart"}, + {NDBD_EXIT_LOST_NODE_GROUP, XAE, + "All nodes in a node group are unavailable"}, + {NDBD_EXIT_NO_RESTORABLE_REPLICA, XFI, + "Unable to find a restorable replica"}, + + /* ACC */ + {NDBD_EXIT_SR_OUT_OF_INDEXMEMORY, XCR, + "Out of index memory during system restart, please increase IndexMemory"}, + + /* TUP */ + {NDBD_EXIT_SR_OUT_OF_DATAMEMORY, XCR, + "Out of data memory during system restart, please increase DataMemory"}, + + /* Ndbfs error messages */ + /* Most codes will have additional info, such as OS error code */ + {NDBD_EXIT_AFS_NOPATH, XIE, "No file system path"}, + {2802, XIE, "Channel is full"}, + {2803, XIE, "No more threads"}, + {NDBD_EXIT_AFS_PARAMETER, XIE, "Bad parameter"}, + {NDBD_EXIT_AFS_INVALIDPATH, XCE, "Illegal file system path"}, + {NDBD_EXIT_AFS_MAXOPEN, XCR, + "Max number of open files exceeded, please increase MaxNoOfOpenFiles"}, + {NDBD_EXIT_AFS_ALREADY_OPEN, XIE, "File has already been opened"}, + + {NDBD_EXIT_AFS_ENVIRONMENT , XIE, "Environment error using file"}, + {NDBD_EXIT_AFS_TEMP_NO_ACCESS , XIE, "Temporary on access to file"}, + {NDBD_EXIT_AFS_DISK_FULL , XFF, "The file system is full"}, + {NDBD_EXIT_AFS_PERMISSION_DENIED , XCE, "Received permission denied for file"}, + {NDBD_EXIT_AFS_INVALID_PARAM , XCE, "Invalid parameter for file"}, + {NDBD_EXIT_AFS_UNKNOWN , XIE, "Unknown file system error"}, + {NDBD_EXIT_AFS_NO_MORE_RESOURCES , XIE, + "System reports no more file system resources"}, + {NDBD_EXIT_AFS_NO_SUCH_FILE , XFI, "File not found"}, + {NDBD_EXIT_AFS_READ_UNDERFLOW , XFI, "Read underflow"}, + + /* Sentinel */ + {0, XUE, + "No message slogan found (please report a bug if you get this error code)"} +}; + +typedef struct StatusExitMessage { + ndbd_exit_status status; + const char * message; +} StatusExitMessage; + +typedef struct StatusExitClassification { + ndbd_exit_status status; + ndbd_exit_classification classification; + const char * message; +} StatusExitClassification; + +/** + * Mapping between classification and status + */ +static +const +StatusExitMessage StatusExitMessageMapping[] = { + { XST_S, "Success"}, + { XST_U ,"Unknown"}, + { XST_P, "Permanent error, external action needed"}, + { XST_R, "Temporary error, restart node"}, + { XST_I, "Ndbd file system error, restart node initial"} +}; + +static +const +int NbExitStatus = sizeof(StatusExitMessageMapping)/sizeof(StatusExitMessage); + +static +const +StatusExitClassification StatusExitClassificationMapping[] = { + { XST_S, XNE, "No error"}, + { XST_U, XUE, "Unknown"}, + { XST_R, XIE, "Internal error, programming error or missing error message, " + "please report a bug"}, + { XST_P, XCE, "Configuration error"}, + { XST_R, XAE, "Arbitration error"}, + { XST_R, XRE, "Restart error"}, + { XST_P, XCR, "Resource configuration error"}, + { XST_P, XFF, "File system full"}, + { XST_I, XFI, "Ndbd file system inconsistency error, please report a bug"}, + { XST_I, XFL, "Ndbd file system limit exceeded"} +}; + +static const int NbExitClassification = +sizeof(StatusExitClassificationMapping)/sizeof(StatusExitClassification); + +const char *ndbd_exit_message(int faultId, ndbd_exit_classification *cl) +{ + int i = 0; + while (errArray[i].faultId != faultId && errArray[i].faultId != 0) + i++; + *cl = errArray[i].classification; + return errArray[i].text; +} + +static const char* empty_xstring = ""; + +const +char *ndbd_exit_classification_message(ndbd_exit_classification classification, + ndbd_exit_status *status) +{ + int i; + for (i= 0; i < NbExitClassification; i++) + { + if (StatusExitClassificationMapping[i].classification == classification) + { + *status = StatusExitClassificationMapping[i].status; + return StatusExitClassificationMapping[i].message; + } + } + *status = XST_U; + return empty_xstring; +} + +const char *ndbd_exit_status_message(ndbd_exit_status status) +{ + int i; + for (i= 0; i < NbExitStatus; i++) + if (StatusExitMessageMapping[i].status == status) + return StatusExitMessageMapping[i].message; + return empty_xstring; +} + +int ndbd_exit_string(int err_no, char *str, unsigned int size) +{ + unsigned int len; + + ndbd_exit_classification cl; + ndbd_exit_status st; + const char *msg = ndbd_exit_message(err_no, &cl); + if (msg[0] != '\0') + { + const char *cl_msg = ndbd_exit_classification_message(cl, &st); + const char *st_msg = ndbd_exit_status_message(st); + + len = my_snprintf(str, size-1, "%s: %s: %s", msg, st_msg, cl_msg); + str[size-1]= '\0'; + + return len; + } + return -1; +} diff --git a/ndb/src/kernel/main.cpp b/ndb/src/kernel/main.cpp index d9953b920d2..649ae7cae3f 100644 --- a/ndb/src/kernel/main.cpp +++ b/ndb/src/kernel/main.cpp @@ -19,6 +19,7 @@ #include <ndb_version.h> #include "Configuration.hpp" +#include <ConfigRetriever.hpp> #include <TransporterRegistry.hpp> #include "vm/SimBlockList.hpp" @@ -36,6 +37,10 @@ #include <NdbAutoPtr.hpp> +#include <Properties.hpp> + +#include <mgmapi_debug.h> + #if defined NDB_SOLARIS // ok #include <sys/processor.h> // For system informatio #endif @@ -58,16 +63,183 @@ extern "C" void handler_sigusr1(int signum); // child signalling failed restart void systemInfo(const Configuration & conf, const LogLevel & ll); +// These are used already before fork if fetch_configuration() fails +// (e.g. Unable to alloc node id). Set them to something reasonable. +static FILE *child_info_file_r= stdin; +static FILE *child_info_file_w= stdout; + +static void writeChildInfo(const char *token, int val) +{ + fprintf(child_info_file_w, "%s=%d\n", token, val); + fflush(child_info_file_w); +} + +void childReportSignal(int signum) +{ + writeChildInfo("signal", signum); +} + +void childReportError(int error) +{ + writeChildInfo("error", error); +} + +void childExit(int code, Uint32 currentStartPhase) +{ + writeChildInfo("sphase", currentStartPhase); + writeChildInfo("exit", code); + fprintf(child_info_file_w, "\n"); + fclose(child_info_file_r); + fclose(child_info_file_w); + exit(code); +} + +void childAbort(int code, Uint32 currentStartPhase) +{ + writeChildInfo("sphase", currentStartPhase); + writeChildInfo("exit", code); + fprintf(child_info_file_w, "\n"); + fclose(child_info_file_r); + fclose(child_info_file_w); + signal(6, SIG_DFL); + abort(); +} + +static int insert(const char * pair, Properties & p) +{ + BaseString tmp(pair); + + tmp.trim(" \t\n\r"); + Vector<BaseString> split; + tmp.split(split, ":=", 2); + if(split.size() != 2) + return -1; + p.put(split[0].trim().c_str(), split[1].trim().c_str()); + return 0; +} + +static int readChildInfo(Properties &info) +{ + fclose(child_info_file_w); + char buf[128]; + while (fgets(buf,sizeof(buf),child_info_file_r)) + insert(buf,info); + fclose(child_info_file_r); + return 0; +} + +static bool get_int_property(Properties &info, + const char *token, Uint32 *int_val) +{ + const char *str_val= 0; + if (!info.get(token, &str_val)) + return false; + char *endptr; + long int tmp= strtol(str_val, &endptr, 10); + if (str_val == endptr) + return false; + *int_val = tmp; + return true; +} + +int reportShutdown(class Configuration *config, int error_exit, int restart) +{ + Uint32 error= 0, signum= 0, sphase= 256; + Properties info; + readChildInfo(info); + + get_int_property(info, "signal", &signum); + get_int_property(info, "error", &error); + get_int_property(info, "sphase", &sphase); + + Uint32 length, theData[25]; + EventReport *rep = (EventReport *)theData; + + rep->setNodeId(globalData.ownId); + if (restart) + theData[1] = 1 | + (globalData.theRestartFlag == initial_state ? 2 : 0) | + (config->getInitialStart() ? 4 : 0); + else + theData[1] = 0; + + if (error_exit == 0) + { + rep->setEventType(NDB_LE_NDBStopCompleted); + theData[2] = signum; + length = 3; + } + else + { + rep->setEventType(NDB_LE_NDBStopForced); + theData[2] = signum; + theData[3] = error; + theData[4] = sphase; + theData[5] = 0; // extra + length = 6; + } + + { // Log event + const EventReport * const eventReport = (EventReport *)&theData[0]; + g_eventLogger.log(eventReport->getEventType(), theData, + eventReport->getNodeId(), 0); + } + + for (unsigned n = 0; n < config->m_mgmds.size(); n++) + { + NdbMgmHandle h = ndb_mgm_create_handle(); + if (h == 0 || + ndb_mgm_set_connectstring(h, config->m_mgmds[n].c_str()) || + ndb_mgm_connect(h, + 1, //no_retries + 0, //retry_delay_in_seconds + 0 //verbose + )) + goto handle_error; + + { + if (ndb_mgm_report_event(h, theData, length)) + goto handle_error; + } + goto do_next; + +handle_error: + if (h) + { + BaseString tmp(ndb_mgm_get_latest_error_msg(h)); + tmp.append(" : "); + tmp.append(ndb_mgm_get_latest_error_desc(h)); + g_eventLogger.warning("Unable to report shutdown reason to %s: %s", + config->m_mgmds[n].c_str(), tmp.c_str()); + } + else + { + g_eventLogger.error("Unable to report shutdown reason to %s", + config->m_mgmds[n].c_str()); + } +do_next: + if (h) + { + ndb_mgm_disconnect(h); + ndb_mgm_destroy_handle(&h); + } + } + return 0; +} + int main(int argc, char** argv) { NDB_INIT(argv[0]); // Print to stdout/console g_eventLogger.createConsoleHandler(); g_eventLogger.setCategory("ndbd"); + g_eventLogger.enable(Logger::LL_ON, Logger::LL_INFO); g_eventLogger.enable(Logger::LL_ON, Logger::LL_CRITICAL); g_eventLogger.enable(Logger::LL_ON, Logger::LL_ERROR); g_eventLogger.enable(Logger::LL_ON, Logger::LL_WARNING); + g_eventLogger.m_logLevel.setLogLevel(LogLevel::llStartUp, 15); + globalEmulatorData.create(); // Parse command line options @@ -96,22 +268,60 @@ int main(int argc, char** argv) return 1; } } - + #ifndef NDB_WIN32 signal(SIGUSR1, handler_sigusr1); - for(pid_t child = fork(); child != 0; child = fork()){ + pid_t child = -1; + while (! theConfig->getForegroundMode()) // the cond is const + { + // setup reporting between child and parent + int filedes[2]; + if (pipe(filedes)) + { + g_eventLogger.error("pipe() failed with errno=%d (%s)", + errno, strerror(errno)); + return 1; + } + else + { + if (!(child_info_file_w= fdopen(filedes[1],"w"))) + { + g_eventLogger.error("fdopen() failed with errno=%d (%s)", + errno, strerror(errno)); + } + if (!(child_info_file_r= fdopen(filedes[0],"r"))) + { + g_eventLogger.error("fdopen() failed with errno=%d (%s)", + errno, strerror(errno)); + } + } + + if ((child = fork()) <= 0) + break; // child or error + /** * Parent */ + catchsigs(true); - int status = 0; + /** + * We no longer need the mgm connection in this process + * (as we are the angel, not ndb) + * + * We don't want to purge any allocated resources (nodeid), so + * we set that option to false + */ + theConfig->closeConfiguration(false); + + int status = 0, error_exit = 0, signum = 0; while(waitpid(child, &status, 0) != child); if(WIFEXITED(status)){ switch(WEXITSTATUS(status)){ case NRT_Default: g_eventLogger.info("Angel shutting down"); + reportShutdown(theConfig, 0, 0); exit(0); break; case NRT_NoStart_Restart: @@ -127,10 +337,12 @@ int main(int argc, char** argv) globalData.theRestartFlag = perform_start; break; default: + error_exit = 1; if(theConfig->stopOnError()){ /** * Error shutdown && stopOnError() */ + reportShutdown(theConfig, error_exit, 0); exit(0); } // Fall-through @@ -139,12 +351,27 @@ int main(int argc, char** argv) globalData.theRestartFlag = perform_start; break; } - } else if(theConfig->stopOnError()){ - /** - * Error shutdown && stopOnError() - */ - exit(0); + } else { + error_exit = 1; + if (WIFSIGNALED(status)) + { + signum = WTERMSIG(status); + childReportSignal(signum); + } + else + { + signum = 127; + g_eventLogger.info("Unknown exit reason. Stopped."); + } + if(theConfig->stopOnError()){ + /** + * Error shutdown && stopOnError() + */ + reportShutdown(theConfig, error_exit, 0); + exit(0); + } } + if (!failed_startup_flag) { // Reset the counter for consecutive failed startups @@ -155,15 +382,23 @@ int main(int argc, char** argv) /** * Error shutdown && stopOnError() */ - g_eventLogger.alert("Ndbd has failed %u consecutive startups. Not restarting", failed_startups); + g_eventLogger.alert("Ndbd has failed %u consecutive startups. " + "Not restarting", failed_startups); + reportShutdown(theConfig, error_exit, 0); exit(0); } failed_startup_flag = false; + reportShutdown(theConfig, error_exit, 1); g_eventLogger.info("Ndb has terminated (pid %d) restarting", child); theConfig->fetch_configuration(); } - g_eventLogger.info("Angel pid: %d ndb pid: %d", getppid(), getpid()); + if (child >= 0) + g_eventLogger.info("Angel pid: %d ndb pid: %d", getppid(), getpid()); + else if (child > 0) + g_eventLogger.info("Ndb pid: %d", getpid()); + else + g_eventLogger.info("Ndb started in foreground"); #else g_eventLogger.info("Ndb started"); #endif @@ -185,6 +420,10 @@ int main(int argc, char** argv) FILE * signalLog = fopen(buf, "a"); globalSignalLoggers.setOwnNodeId(globalData.ownId); globalSignalLoggers.setOutputStream(signalLog); +#if 0 // to log startup + globalSignalLoggers.log(SignalLoggerManager::LogInOut, "BLOCK=DBDICT,DBDIH"); + globalData.testOn = 1; +#endif #endif catchsigs(false); @@ -214,6 +453,13 @@ int main(int argc, char** argv) exit(-1); } + // Re-use the mgm handle as a transporter + if(!globalTransporterRegistry.connect_client( + theConfig->get_config_retriever()->get_mgmHandlePtr())) + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, + "Connection to mgmd terminated before setup was complete", + "StopOnError missing"); + if (!globalTransporterRegistry.start_clients()){ ndbout_c("globalTransporterRegistry.start_clients() failed"); exit(-1); @@ -331,10 +577,7 @@ catchsigs(bool ignore){ #ifdef SIGPOLL SIGPOLL, #endif - SIGSEGV, -#ifdef SIGTRAP - SIGTRAP -#endif + SIGSEGV }; static const int signals_ignore[] = { @@ -348,6 +591,11 @@ catchsigs(bool ignore){ handler_register(signals_error[i], handler_error, ignore); for(i = 0; i < sizeof(signals_ignore)/sizeof(signals_ignore[0]); i++) handler_register(signals_ignore[i], SIG_IGN, ignore); +#ifdef SIGTRAP + Configuration* theConfig = globalEmulatorData.theConfiguration; + if (! theConfig->getForegroundMode()) + handler_register(SIGTRAP, handler_error, ignore); +#endif #endif } @@ -355,6 +603,8 @@ extern "C" void handler_shutdown(int signum){ g_eventLogger.info("Received signal %d. Performing stop.", signum); + childReportError(0); + childReportSignal(signum); globalData.theRestartFlag = perform_stop; } @@ -379,10 +629,15 @@ handler_error(int signum){ NdbSleep_MilliSleep(10); thread_id= my_thread_id(); g_eventLogger.info("Received signal %d. Running error handler.", signum); + childReportSignal(signum); // restart the system - char errorData[40]; - BaseString::snprintf(errorData, 40, "Signal %d received", signum); - ERROR_SET_SIGNAL(fatal, 0, errorData, __FILE__); + char errorData[64], *info= 0; +#ifdef HAVE_STRSIGNAL + info= strsignal(signum); +#endif + BaseString::snprintf(errorData, sizeof(errorData), "Signal %d received; %s", signum, + info ? info : "No text for signal available"); + ERROR_SET_SIGNAL(fatal, NDBD_EXIT_OS_SIGNAL_RECEIVED, errorData, __FILE__); } extern "C" diff --git a/ndb/src/kernel/vm/ArrayPool.hpp b/ndb/src/kernel/vm/ArrayPool.hpp index 924ed51ee15..3b1264af8be 100644 --- a/ndb/src/kernel/vm/ArrayPool.hpp +++ b/ndb/src/kernel/vm/ArrayPool.hpp @@ -18,6 +18,7 @@ #define ARRAY_POOL_HPP #include <ndb_global.h> +#include "ndbd_malloc.hpp" #include <pc.hpp> #include <ErrorReporter.hpp> @@ -44,7 +45,7 @@ public: * * Note, can currently only be called once */ - bool setSize(Uint32 noOfElements); + bool setSize(Uint32 noOfElements, bool exit_on_error = true); inline Uint32 getNoOfFree() const { return noOfFree; @@ -201,7 +202,7 @@ template <class T> inline ArrayPool<T>::~ArrayPool(){ if(theArray != 0){ - NdbMem_Free(theArray); + ndbd_free(theArray, size * sizeof(T)); theArray = 0; #ifdef ARRAY_GUARD delete []theAllocatedBitmask; @@ -218,13 +219,19 @@ ArrayPool<T>::~ArrayPool(){ template <class T> inline bool -ArrayPool<T>::setSize(Uint32 noOfElements){ +ArrayPool<T>::setSize(Uint32 noOfElements, bool exit_on_error){ if(size == 0){ if(noOfElements == 0) return true; - theArray = (T *)NdbMem_Allocate(noOfElements * sizeof(T)); + theArray = (T *)ndbd_malloc(noOfElements * sizeof(T)); if(theArray == 0) - return false; + { + if (!exit_on_error) + return false; + ErrorReporter::handleAssert("ArrayPool<T>::setSize malloc failed", + __FILE__, __LINE__, NDBD_EXIT_MEMALLOC); + return false; // not reached + } size = noOfElements; noOfFree = noOfElements; @@ -247,7 +254,11 @@ ArrayPool<T>::setSize(Uint32 noOfElements){ return true; } - return false; + if (!exit_on_error) + return false; + + ErrorReporter::handleAssert("ArrayPool<T>::setSize called twice", __FILE__, __LINE__); + return false; // not reached } template <class T> diff --git a/ndb/src/kernel/vm/CArray.hpp b/ndb/src/kernel/vm/CArray.hpp index a6e84e2c041..93f75056b50 100644 --- a/ndb/src/kernel/vm/CArray.hpp +++ b/ndb/src/kernel/vm/CArray.hpp @@ -17,6 +17,8 @@ #ifndef CARRAY_HPP #define CARRAY_HPP +#include "ndbd_malloc.hpp" + /** * Template class used for implementing an c - array */ @@ -31,7 +33,7 @@ public: * * Note, can currently only be called once */ - bool setSize(Uint32 noOfElements); + bool setSize(Uint32 noOfElements, bool exit_on_error = true); /** * Get size @@ -69,7 +71,7 @@ template <class T> inline CArray<T>::~CArray(){ if(theArray != 0){ - NdbMem_Free(theArray); + ndbd_free(theArray, size * sizeof(T)); theArray = 0; } } @@ -82,13 +84,19 @@ CArray<T>::~CArray(){ template <class T> inline bool -CArray<T>::setSize(Uint32 noOfElements){ +CArray<T>::setSize(Uint32 noOfElements, bool exit_on_error){ if(size == noOfElements) return true; - theArray = (T *)NdbMem_Allocate(noOfElements * sizeof(T)); + theArray = (T *)ndbd_malloc(noOfElements * sizeof(T)); if(theArray == 0) - return false; + { + if (!exit_on_error) + return false; + ErrorReporter::handleAssert("CArray<T>::setSize malloc failed", + __FILE__, __LINE__, NDBD_EXIT_MEMALLOC); + return false; // not reached + } size = noOfElements; return true; } diff --git a/ndb/src/kernel/vm/ClusterConfiguration.cpp b/ndb/src/kernel/vm/ClusterConfiguration.cpp index d5bd03f69d5..813407b497e 100644 --- a/ndb/src/kernel/vm/ClusterConfiguration.cpp +++ b/ndb/src/kernel/vm/ClusterConfiguration.cpp @@ -359,12 +359,12 @@ void ClusterConfiguration::init(const Properties & p, const Properties & db){ if(!db.get(tmp[i].attrib, tmp[i].storage)){ char buf[255]; BaseString::snprintf(buf, sizeof(buf), "%s not found", tmp[i].attrib); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } } if(!p.get("NoOfNodes", &cd.SizeAltData.noOfNodes)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, "NoOfNodes missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, "NoOfNodes missing"); } Properties::Iterator it(&p); @@ -378,36 +378,36 @@ void ClusterConfiguration::init(const Properties & p, const Properties & db){ const Properties * node; if(!p.get(name, &node)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, "Node data missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, "Node data missing"); } if(!node->get("Id", &nodeId)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, "Node data (Id) missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, "Node data (Id) missing"); } if(!node->get("Type", &nodeType)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, "Node data (Type) missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, "Node data (Type) missing"); } if(nodeId > MAX_NODES){ char buf[255]; snprintf(buf, sizeof(buf), "Maximum DB node id allowed is: %d", MAX_NDB_NODES); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } if(nodeId == 0){ char buf[255]; snprintf(buf, sizeof(buf), "Minimum node id allowed in the cluster is: 1"); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } for(unsigned j = 0; j<nodeNo; j++){ if(cd.nodeData[j].nodeId == nodeId){ char buf[255]; BaseString::snprintf(buf, sizeof(buf), "Two node can not have the same node id"); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } } @@ -430,14 +430,14 @@ void ClusterConfiguration::init(const Properties & p, const Properties & db){ if(nodeId > MAX_NDB_NODES){ char buf[255]; BaseString::snprintf(buf, sizeof(buf), "Maximum node id for a ndb node is: %d", MAX_NDB_NODES); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } if(cd.SizeAltData.noOfNDBNodes > MAX_NDB_NODES){ char buf[255]; BaseString::snprintf(buf, sizeof(buf), "Maximum %d ndb nodes is allowed in the cluster", MAX_NDB_NODES); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } } else if(strcmp("API", nodeType) == 0){ cd.nodeData[nodeNo].nodeType = NodeInfo::API; @@ -452,7 +452,7 @@ void ClusterConfiguration::init(const Properties & p, const Properties & db){ cd.SizeAltData.noOfMGMNodes++; // No of MGM processes tmpApiMgmProperties = "MGM"; } else { - ERROR_SET(fatal, ERR_INVALID_CONFIG, + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration: Unknown node type", nodeType); } @@ -462,7 +462,7 @@ void ClusterConfiguration::init(const Properties & p, const Properties & db){ const Properties* q = 0; if (!p.get(tmpApiMgmProperties, nodeId, &q)) { - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, tmpApiMgmProperties); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, tmpApiMgmProperties); } else { */ Uint32 rank = 0; diff --git a/ndb/src/kernel/vm/Configuration.cpp b/ndb/src/kernel/vm/Configuration.cpp index de78a4e927c..7d1a5ed2ff4 100644 --- a/ndb/src/kernel/vm/Configuration.cpp +++ b/ndb/src/kernel/vm/Configuration.cpp @@ -48,11 +48,21 @@ extern EventLogger g_eventLogger; enum ndbd_options { OPT_INITIAL = NDB_STD_OPTIONS_LAST, - OPT_NODAEMON + OPT_NODAEMON, + OPT_FOREGROUND, + OPT_NOWAIT_NODES, + OPT_INITIAL_START }; NDB_STD_OPTS_VARS; -static int _daemon, _no_daemon, _initial, _no_start; +// XXX should be my_bool ??? +static int _daemon, _no_daemon, _foreground, _initial, _no_start; +static int _initialstart; +static const char* _nowait_nodes; + +extern Uint32 g_start_type; +extern NdbNodeBitmask g_nowait_nodes; + /** * Arguments to NDB process */ @@ -75,6 +85,19 @@ static struct my_option my_long_options[] = "Do not start ndbd as daemon, provided for testing purposes", (gptr*) &_no_daemon, (gptr*) &_no_daemon, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, + { "foreground", OPT_FOREGROUND, + "Run real ndbd in foreground, provided for debugging purposes" + " (implies --nodaemon)", + (gptr*) &_foreground, (gptr*) &_foreground, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, + { "nowait-nodes", OPT_NOWAIT_NODES, + "Nodes that will not be waited for during start", + (gptr*) &_nowait_nodes, (gptr*) &_nowait_nodes, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 }, + { "initial-start", OPT_INITIAL_START, + "Perform initial start", + (gptr*) &_initialstart, (gptr*) &_initialstart, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; static void short_usage_sub(void) @@ -88,13 +111,6 @@ static void usage() my_print_help(my_long_options); my_print_variables(my_long_options); } -static my_bool -get_one_option(int optid, const struct my_option *opt __attribute__((unused)), - char *argument) -{ - return ndb_std_get_one_option(optid, opt, - argument ? argument : "d:t:O,/tmp/ndbd.trace"); -} bool Configuration::init(int argc, char** argv) @@ -103,16 +119,21 @@ Configuration::init(int argc, char** argv) load_defaults("my",load_default_groups,&argc,&argv); int ho_error; - if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option))) +#ifndef DBUG_OFF + opt_debug= "d:t:O,/tmp/ndbd.trace"; +#endif + if ((ho_error=handle_options(&argc, &argv, my_long_options, + ndb_std_get_one_option))) exit(ho_error); - if (_no_daemon) { + if (_no_daemon || _foreground) { _daemon= 0; } DBUG_PRINT("info", ("no_start=%d", _no_start)); DBUG_PRINT("info", ("initial=%d", _initial)); DBUG_PRINT("info", ("daemon=%d", _daemon)); + DBUG_PRINT("info", ("foreground=%d", _foreground)); DBUG_PRINT("info", ("connect_str=%s", opt_connect_str)); ndbSetOwnVersion(); @@ -134,6 +155,8 @@ Configuration::init(int argc, char** argv) // Check daemon flag if (_daemon) _daemonMode = true; + if (_foreground) + _foregroundMode = true; // Save programname if(argc > 0 && argv[0] != 0) @@ -143,6 +166,37 @@ Configuration::init(int argc, char** argv) globalData.ownId= 0; + if (_nowait_nodes) + { + BaseString str(_nowait_nodes); + Vector<BaseString> arr; + str.split(arr, ","); + for (Uint32 i = 0; i<arr.size(); i++) + { + char *endptr = 0; + long val = strtol(arr[i].c_str(), &endptr, 10); + if (*endptr) + { + ndbout_c("Unable to parse nowait-nodes argument: %s : %s", + arr[i].c_str(), _nowait_nodes); + exit(-1); + } + if (! (val > 0 && val < MAX_NDB_NODES)) + { + ndbout_c("Invalid nodeid specified in nowait-nodes: %d : %s", + val, _nowait_nodes); + exit(-1); + } + g_nowait_nodes.set(val); + } + } + + if (_initialstart) + { + _initialStart = true; + g_start_type |= (1 << NodeState::ST_INITIAL_START); + } + return true; } @@ -154,11 +208,17 @@ Configuration::Configuration() _backupPath = 0; _initialStart = false; _daemonMode = false; + _foregroundMode = false; m_config_retriever= 0; m_clusterConfig= 0; + m_clusterConfigIter= 0; + m_logLevel= 0; } Configuration::~Configuration(){ + if (opt_connect_str) + free(_connectString); + if(_programName != NULL) free(_programName); @@ -171,10 +231,15 @@ Configuration::~Configuration(){ if (m_config_retriever) { delete m_config_retriever; } + + if(m_logLevel) { + delete m_logLevel; + } } void -Configuration::closeConfiguration(){ +Configuration::closeConfiguration(bool end_session){ + m_config_retriever->end_session(end_session); if (m_config_retriever) { delete m_config_retriever; } @@ -191,13 +256,12 @@ Configuration::fetch_configuration(){ } m_mgmd_port= 0; - m_mgmd_host= 0; m_config_retriever= new ConfigRetriever(getConnectString(), NDB_VERSION, NODE_TYPE_DB); if (m_config_retriever->hasError()) { - ERROR_SET(fatal, ERR_INVALID_CONFIG, + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Could not connect initialize handle to management server", m_config_retriever->getErrorString()); } @@ -209,11 +273,11 @@ Configuration::fetch_configuration(){ /* Set stop on error to true otherwise NDB will go into an restart loop... */ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Could not connect to ndb_mgmd", s); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Could not connect to ndb_mgmd", s); } m_mgmd_port= m_config_retriever->get_mgmd_port(); - m_mgmd_host= m_config_retriever->get_mgmd_host(); + m_mgmd_host.assign(m_config_retriever->get_mgmd_host()); ConfigRetriever &cr= *m_config_retriever; @@ -224,10 +288,11 @@ Configuration::fetch_configuration(){ if (globalData.ownId) cr.setNodeId(globalData.ownId); - globalData.ownId = cr.allocNodeId(2 /*retry*/,3 /*delay*/); + globalData.ownId = cr.allocNodeId(globalData.ownId ? 10 : 2 /*retry*/, + 3 /*delay*/); if(globalData.ownId == 0){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Unable to alloc node id", m_config_retriever->getErrorString()); } @@ -241,7 +306,7 @@ Configuration::fetch_configuration(){ go into an restart loop... */ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Could not fetch configuration" + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Could not fetch configuration" "/invalid configuration", s); } if(m_clusterConfig) @@ -251,13 +316,36 @@ Configuration::fetch_configuration(){ ndb_mgm_configuration_iterator iter(* p, CFG_SECTION_NODE); if (iter.find(CFG_NODE_ID, globalData.ownId)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", "DB missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "DB missing"); } if(iter.get(CFG_DB_STOP_ON_ERROR, &_stopOnError)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "StopOnError missing"); } + + m_mgmds.clear(); + for(ndb_mgm_first(&iter); ndb_mgm_valid(&iter); ndb_mgm_next(&iter)) + { + Uint32 nodeType, port; + char const *hostname; + + ndb_mgm_get_int_parameter(&iter,CFG_TYPE_OF_SECTION,&nodeType); + + if (nodeType != NodeInfo::MGM) + continue; + + if (ndb_mgm_get_string_parameter(&iter,CFG_NODE_HOST, &hostname) || + ndb_mgm_get_int_parameter(&iter,CFG_MGM_PORT, &port) || + hostname == 0 || hostname[0] == 0) + { + continue; + } + BaseString connectstring(hostname); + connectstring.appfmt(":%d", port); + + m_mgmds.push_back(connectstring); + } } static char * get_and_validate_path(ndb_mgm_configuration_iterator &iter, @@ -265,12 +353,12 @@ static char * get_and_validate_path(ndb_mgm_configuration_iterator &iter, { const char* path = NULL; if(iter.get(param, &path)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched missing ", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched missing ", param_string); } if(path == 0 || strlen(path) == 0){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched. Configuration does not contain valid ", param_string); } @@ -288,7 +376,7 @@ static char * get_and_validate_path(ndb_mgm_configuration_iterator &iter, (::access(buf2, W_OK) != 0)) #endif { - ERROR_SET(fatal, AFS_ERROR_INVALIDPATH, path, " Filename::init()"); + ERROR_SET(fatal, NDBD_EXIT_AFS_INVALIDPATH, path, param_string); } if (strcmp(&buf2[strlen(buf2) - 1], DIR_SEPARATOR)) @@ -312,7 +400,7 @@ Configuration::setupConfiguration(){ * p, globalTransporterRegistry); if(res <= 0){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "No transporters configured"); } } @@ -322,27 +410,27 @@ Configuration::setupConfiguration(){ */ ndb_mgm_configuration_iterator iter(* p, CFG_SECTION_NODE); if (iter.find(CFG_NODE_ID, globalData.ownId)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", "DB missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "DB missing"); } unsigned type; if(!(iter.get(CFG_TYPE_OF_SECTION, &type) == 0 && type == NODE_TYPE_DB)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "I'm wrong type of node"); } if(iter.get(CFG_DB_NO_SAVE_MSGS, &_maxErrorLogs)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "MaxNoOfSavedMessages missing"); } if(iter.get(CFG_DB_MEMLOCK, &_lockPagesInMainMemory)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "LockPagesInMainMemory missing"); } if(iter.get(CFG_DB_WATCHDOG_INTERVAL, &_timeBetweenWatchDogCheck)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "TimeBetweenWatchDogCheck missing"); } @@ -357,7 +445,7 @@ Configuration::setupConfiguration(){ _backupPath= get_and_validate_path(iter, CFG_DB_BACKUP_DATADIR, "BackupDataDir"); if(iter.get(CFG_DB_STOP_ON_ERROR_INSERT, &m_restartOnErrorInsert)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, "Invalid configuration fetched", + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", "RestartOnErrorInsert missing"); } @@ -372,6 +460,8 @@ Configuration::setupConfiguration(){ ConfigValues* cf = ConfigValuesFactory::extractCurrentSection(iter.m_config); + if(m_clusterConfigIter) + ndb_mgm_destroy_iterator(m_clusterConfigIter); m_clusterConfigIter = ndb_mgm_create_configuration_iterator (p, CFG_SECTION_NODE); @@ -497,7 +587,7 @@ Configuration::calcSizeAlt(ConfigValues * ownConfig){ *tmp[i].storage = 0; } else { BaseString::snprintf(buf, sizeof(buf),"ConfigParam: %d not found", tmp[i].paramId); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } } } @@ -507,12 +597,12 @@ Configuration::calcSizeAlt(ConfigValues * ownConfig){ ndb_mgm_get_int64_parameter(&db, CFG_DB_INDEX_MEM, &indexMem); if(dataMem == 0){ BaseString::snprintf(buf, sizeof(buf), "ConfigParam: %d not found", CFG_DB_DATA_MEM); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } if(indexMem == 0){ BaseString::snprintf(buf, sizeof(buf), "ConfigParam: %d not found", CFG_DB_INDEX_MEM); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } noOfDataPages = (dataMem / 32768); @@ -536,23 +626,23 @@ Configuration::calcSizeAlt(ConfigValues * ownConfig){ Uint32 nodeType; if(ndb_mgm_get_int_parameter(p, CFG_NODE_ID, &nodeId)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, "Node data (Id) missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, "Node data (Id) missing"); } if(ndb_mgm_get_int_parameter(p, CFG_TYPE_OF_SECTION, &nodeType)){ - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, "Node data (Type) missing"); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, "Node data (Type) missing"); } if(nodeId > MAX_NODES || nodeId == 0){ BaseString::snprintf(buf, sizeof(buf), "Invalid node id: %d", nodeId); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } if(nodes.get(nodeId)){ BaseString::snprintf(buf, sizeof(buf), "Two node can not have the same node id: %d", nodeId); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } nodes.set(nodeId); @@ -563,7 +653,7 @@ Configuration::calcSizeAlt(ConfigValues * ownConfig){ if(nodeId > MAX_NDB_NODES){ BaseString::snprintf(buf, sizeof(buf), "Maximum node id for a ndb node is: %d", MAX_NDB_NODES); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } break; case NODE_TYPE_API: @@ -578,7 +668,7 @@ Configuration::calcSizeAlt(ConfigValues * ownConfig){ break; default: BaseString::snprintf(buf, sizeof(buf), "Unknown node type: %d", nodeType); - ERROR_SET(fatal, ERR_INVALID_CONFIG, msg, buf); + ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, msg, buf); } } noOfNodes = nodeNo; diff --git a/ndb/src/kernel/vm/Configuration.hpp b/ndb/src/kernel/vm/Configuration.hpp index acf0e163a84..6315209ddbb 100644 --- a/ndb/src/kernel/vm/Configuration.hpp +++ b/ndb/src/kernel/vm/Configuration.hpp @@ -17,6 +17,7 @@ #ifndef Configuration_H #define Configuration_H +#include <util/BaseString.hpp> #include <mgmapi.h> #include <ndb_types.h> @@ -34,7 +35,7 @@ public: void fetch_configuration(); void setupConfiguration(); - void closeConfiguration(); + void closeConfiguration(bool end_session= true); bool lockPagesInMainMemory() const; @@ -63,16 +64,20 @@ public: bool getInitialStart() const; void setInitialStart(bool val); bool getDaemonMode() const; + bool getForegroundMode() const; const ndb_mgm_configuration_iterator * getOwnConfigIterator() const; Uint32 get_mgmd_port() const {return m_mgmd_port;}; - const char *get_mgmd_host() const {return m_mgmd_host;}; + const char *get_mgmd_host() const {return m_mgmd_host.c_str();}; + ConfigRetriever* get_config_retriever() { return m_config_retriever; }; class LogLevel * m_logLevel; private: friend class Cmvmi; friend class Qmgr; + friend int reportShutdown(class Configuration *config, int error, int restart); + ndb_mgm_configuration_iterator * getClusterConfigIterator() const; Uint32 _stopOnError; @@ -89,6 +94,8 @@ private: ConfigRetriever *m_config_retriever; + Vector<BaseString> m_mgmds; + /** * arguments to NDB process */ @@ -98,8 +105,9 @@ private: bool _initialStart; char * _connectString; Uint32 m_mgmd_port; - const char *m_mgmd_host; - bool _daemonMode; + BaseString m_mgmd_host; + bool _daemonMode; // if not, angel in foreground + bool _foregroundMode; // no angel, raw ndbd in foreground void calcSizeAlt(class ConfigValues * ); }; @@ -134,4 +142,10 @@ Configuration::getDaemonMode() const { return _daemonMode; } +inline +bool +Configuration::getForegroundMode() const { + return _foregroundMode; +} + #endif diff --git a/ndb/src/kernel/vm/DLFifoList.hpp b/ndb/src/kernel/vm/DLFifoList.hpp index b139ade831d..963ab007b65 100644 --- a/ndb/src/kernel/vm/DLFifoList.hpp +++ b/ndb/src/kernel/vm/DLFifoList.hpp @@ -115,6 +115,13 @@ public: */ bool hasNext(const Ptr<T> &) const; + /** + * Check if prev exists i.e. this is not first + * + * NOTE ptr must be both p & i + */ + bool hasPrev(const Ptr<T> &) const; + Uint32 noOfElements() const { Uint32 c = 0; Uint32 i = head.firstItem; @@ -357,4 +364,11 @@ DLFifoList<T>::hasNext(const Ptr<T> & p) const { return p.p->nextList != RNIL; } +template <class T> +inline +bool +DLFifoList<T>::hasPrev(const Ptr<T> & p) const { + return p.p->prevList != RNIL; +} + #endif diff --git a/ndb/src/kernel/vm/Emulator.cpp b/ndb/src/kernel/vm/Emulator.cpp index 058829e05e2..e203ec4bde8 100644 --- a/ndb/src/kernel/vm/Emulator.cpp +++ b/ndb/src/kernel/vm/Emulator.cpp @@ -35,11 +35,16 @@ #include <EventLogger.hpp> +void childExit(int code, Uint32 currentStartPhase); +void childAbort(int code, Uint32 currentStartPhase); + extern "C" { extern void (* ndb_new_handler)(); } extern EventLogger g_eventLogger; extern my_bool opt_core; +// instantiated and updated in NdbcntrMain.cpp +extern Uint32 g_currentStartPhase; /** * Declare the global variables @@ -76,7 +81,7 @@ EmulatorData::EmulatorData(){ void ndb_new_handler_impl(){ - ERROR_SET(fatal, ERR_MEMALLOC, "New handler", ""); + ERROR_SET(fatal, NDBD_EXIT_MEMALLOC, "New handler", ""); } void @@ -106,13 +111,14 @@ EmulatorData::destroy(){ delete theSimBlockList; theSimBlockList = 0; if(m_socket_server) delete m_socket_server; m_socket_server = 0; + NdbMutex_Destroy(theShutdownMutex); NdbMem_Destroy(); } void NdbShutdown(NdbShutdownType type, - NdbRestartType restartType){ - + NdbRestartType restartType) +{ if(type == NST_ErrorInsert){ type = NST_Restart; restartType = (NdbRestartType) @@ -181,12 +187,11 @@ NdbShutdown(NdbShutdownType type, g_eventLogger.info("Watchdog shutdown completed - %s", exitAbort); if (opt_core) { - signal(6, SIG_DFL); - abort(); + childAbort(-1,g_currentStartPhase); } else { - exit(-1); + childExit(-1,g_currentStartPhase); } } @@ -241,12 +246,11 @@ NdbShutdown(NdbShutdownType type, g_eventLogger.info("Error handler shutdown completed - %s", exitAbort); if (opt_core) { - signal(6, SIG_DFL); - abort(); + childAbort(-1,g_currentStartPhase); } else { - exit(-1); + childExit(-1,g_currentStartPhase); } } @@ -254,7 +258,7 @@ NdbShutdown(NdbShutdownType type, * This is a normal restart, depend on angel */ if(type == NST_Restart){ - exit(restartType); + childExit(restartType,g_currentStartPhase); } g_eventLogger.info("Shutdown completed - exiting"); @@ -269,10 +273,9 @@ NdbShutdown(NdbShutdownType type, if (type== NST_Watchdog){ g_eventLogger.info("Watchdog is killing system the hard way"); #if defined VM_TRACE && ( ! ( defined NDB_OSE || defined NDB_SOFTOSE) ) - signal(6, SIG_DFL); - abort(); + childAbort(-1,g_currentStartPhase); #else - exit(-1); + childExit(-1,g_currentStartPhase); #endif } diff --git a/ndb/src/kernel/vm/FastScheduler.cpp b/ndb/src/kernel/vm/FastScheduler.cpp index d0b7af27463..ad24a6795a4 100644 --- a/ndb/src/kernel/vm/FastScheduler.cpp +++ b/ndb/src/kernel/vm/FastScheduler.cpp @@ -19,7 +19,6 @@ #include "Emulator.hpp" #include "VMSignal.hpp" -#include <Error.hpp> #include <SignalLoggerManager.hpp> #include <BlockNumbers.h> @@ -395,7 +394,8 @@ void print_restart(FILE * output, Signal* signal, Uint32 aLevel); void FastScheduler::dumpSignalMemory(FILE * output) { - Signal signal; + SignalT<25> signalT; + Signal &signal= *(Signal*)&signalT; Uint32 ReadPtr[5]; Uint32 tJob; Uint32 tLastJob; @@ -444,21 +444,21 @@ void FastScheduler::dumpSignalMemory(FILE * output) void FastScheduler::prio_level_error() { - ERROR_SET(ecError, ERROR_WRONG_PRIO_LEVEL, + ERROR_SET(ecError, NDBD_EXIT_WRONG_PRIO_LEVEL, "Wrong Priority Level", "FastScheduler.C"); } void jbuf_error() { - ERROR_SET(ecError, BLOCK_ERROR_JBUFCONGESTION, + ERROR_SET(ecError, NDBD_EXIT_BLOCK_JBUFCONGESTION, "Job Buffer Full", "APZJobBuffer.C"); } void bnr_error() { - ERROR_SET(ecError, BLOCK_ERROR_BNR_ZERO, + ERROR_SET(ecError, NDBD_EXIT_BLOCK_BNR_ZERO, "Block Number Zero", "FastScheduler.C"); } @@ -484,16 +484,16 @@ print_restart(FILE * output, Signal* signal, Uint32 aLevel) */ void FastScheduler::reportDoJobStatistics(Uint32 tMeanLoopCount) { - Signal signal; - memset(&signal.header, 0, sizeof(signal.header)); + SignalT<2> signalT; + Signal &signal= *(Signal*)&signalT; - signal.theData[0] = EventReport::JobStatistic; - signal.theData[1] = tMeanLoopCount; - - memset(&signal.header, 0, sizeof(SignalHeader)); + memset(&signal.header, 0, sizeof(signal.header)); signal.header.theLength = 2; signal.header.theSendersSignalId = 0; - signal.header.theSendersBlockRef = numberToRef(0, 0); + signal.header.theSendersBlockRef = numberToRef(0, 0); + + signal.theData[0] = NDB_LE_JobStatistic; + signal.theData[1] = tMeanLoopCount; execute(&signal, JBA, CMVMI, GSN_EVENT_REP); } diff --git a/ndb/src/kernel/vm/KeyDescriptor.hpp b/ndb/src/kernel/vm/KeyDescriptor.hpp new file mode 100644 index 00000000000..456d64ce1d8 --- /dev/null +++ b/ndb/src/kernel/vm/KeyDescriptor.hpp @@ -0,0 +1,41 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef KEY_DESCRIPTOR_HPP +#define KEY_DESCRIPTOR_HPP + +#include <ndb_types.h> +#include <ndb_limits.h> +#include "CArray.hpp" + +struct KeyDescriptor +{ + KeyDescriptor () { noOfKeyAttr = hasCharAttr = noOfDistrKeys = 0; } + + Uint8 noOfKeyAttr; + Uint8 hasCharAttr; + Uint8 noOfDistrKeys; + Uint8 unused; + struct KeyAttr + { + Uint32 attributeDescriptor; + CHARSET_INFO* charsetInfo; + } keyAttr[MAX_ATTRIBUTES_IN_INDEX]; +}; + +extern CArray<KeyDescriptor> g_key_descriptor_pool; + +#endif diff --git a/ndb/src/kernel/vm/Makefile.am b/ndb/src/kernel/vm/Makefile.am index 0dce9285ae3..8f9bf92cb01 100644 --- a/ndb/src/kernel/vm/Makefile.am +++ b/ndb/src/kernel/vm/Makefile.am @@ -18,7 +18,7 @@ libkernel_a_SOURCES = \ SimplePropertiesSection.cpp \ SectionReader.cpp \ MetaData.cpp \ - Mutex.cpp SafeCounter.cpp + Mutex.cpp SafeCounter.cpp ndbd_malloc.cpp INCLUDES_LOC = -I$(top_srcdir)/ndb/src/mgmapi diff --git a/ndb/src/kernel/vm/MetaData.hpp b/ndb/src/kernel/vm/MetaData.hpp index 11e262664c1..1000114a421 100644 --- a/ndb/src/kernel/vm/MetaData.hpp +++ b/ndb/src/kernel/vm/MetaData.hpp @@ -86,15 +86,9 @@ public: /* Primary table of index otherwise RNIL */ Uint32 primaryTableId; - /* Type of storage (memory/disk, not used) */ - DictTabInfo::StorageType storageType; - /* Type of fragmentation (small/medium/large) */ DictTabInfo::FragmentType fragmentType; - /* Key type of fragmentation (pk/dist key/dist group) */ - DictTabInfo::FragmentKeyType fragmentKeyType; - /* Global checkpoint identity when table created */ Uint32 gciTableCreated; @@ -166,7 +160,6 @@ public: Uint32 attributeDescriptor; /* Extended attributes */ - Uint32 extType; Uint32 extPrecision; Uint32 extScale; Uint32 extLength; diff --git a/ndb/src/kernel/vm/SafeCounter.cpp b/ndb/src/kernel/vm/SafeCounter.cpp index b09ad08b026..542e43f9172 100644 --- a/ndb/src/kernel/vm/SafeCounter.cpp +++ b/ndb/src/kernel/vm/SafeCounter.cpp @@ -25,8 +25,8 @@ SafeCounterManager::SafeCounterManager(class SimulatedBlock & block) {} bool -SafeCounterManager::setSize(Uint32 maxNoOfActiveMutexes) { - return m_counterPool.setSize(maxNoOfActiveMutexes); +SafeCounterManager::setSize(Uint32 maxNoOfActiveMutexes, bool exit_on_error) { + return m_counterPool.setSize(maxNoOfActiveMutexes, exit_on_error); } Uint32 diff --git a/ndb/src/kernel/vm/SafeCounter.hpp b/ndb/src/kernel/vm/SafeCounter.hpp index 869a7ef671f..917a67f2508 100644 --- a/ndb/src/kernel/vm/SafeCounter.hpp +++ b/ndb/src/kernel/vm/SafeCounter.hpp @@ -63,7 +63,7 @@ class SafeCounterManager { public: SafeCounterManager(class SimulatedBlock &); - bool setSize(Uint32 maxNoOfActiveMutexes); + bool setSize(Uint32 maxNoOfActiveMutexes, bool exit_on_error = true); Uint32 getSize() const ; void execNODE_FAILREP(Signal*); diff --git a/ndb/src/kernel/vm/SimulatedBlock.cpp b/ndb/src/kernel/vm/SimulatedBlock.cpp index 9b52ac65331..b4787209d55 100644 --- a/ndb/src/kernel/vm/SimulatedBlock.cpp +++ b/ndb/src/kernel/vm/SimulatedBlock.cpp @@ -25,11 +25,12 @@ #include <TransporterRegistry.hpp> #include <SignalLoggerManager.hpp> #include <FastScheduler.hpp> -#include <NdbMem.h> +#include "ndbd_malloc.hpp" #include <signaldata/EventReport.hpp> #include <signaldata/ContinueFragmented.hpp> #include <signaldata/NodeStateSignalData.hpp> #include <signaldata/FsRef.hpp> +#include <signaldata/SignalDroppedRep.hpp> #include <DebuggerNames.hpp> #include "LongSignal.hpp" @@ -140,7 +141,6 @@ SimulatedBlock::installSimulatedBlockFunctions(){ a[GSN_UTIL_LOCK_CONF] = &SimulatedBlock::execUTIL_LOCK_CONF; a[GSN_UTIL_UNLOCK_REF] = &SimulatedBlock::execUTIL_UNLOCK_REF; a[GSN_UTIL_UNLOCK_CONF] = &SimulatedBlock::execUTIL_UNLOCK_CONF; - a[GSN_READ_CONFIG_REQ] = &SimulatedBlock::execREAD_CONFIG_REQ; a[GSN_FSOPENREF] = &SimulatedBlock::execFSOPENREF; a[GSN_FSCLOSEREF] = &SimulatedBlock::execFSCLOSEREF; a[GSN_FSWRITEREF] = &SimulatedBlock::execFSWRITEREF; @@ -148,6 +148,7 @@ SimulatedBlock::installSimulatedBlockFunctions(){ a[GSN_FSREMOVEREF] = &SimulatedBlock::execFSREMOVEREF; a[GSN_FSSYNCREF] = &SimulatedBlock::execFSSYNCREF; a[GSN_FSAPPENDREF] = &SimulatedBlock::execFSAPPENDREF; + a[GSN_NODE_START_REP] = &SimulatedBlock::execNODE_START_REP; } void @@ -156,8 +157,8 @@ SimulatedBlock::addRecSignalImpl(GlobalSignalNumber gsn, if(gsn > MAX_GSN || (!force && theExecArray[gsn] != 0)){ char errorMsg[255]; BaseString::snprintf(errorMsg, 255, - "Illeagal signal (%d %d)", gsn, MAX_GSN); - ERROR_SET(fatal, ERR_ERROR_PRGERR, errorMsg, errorMsg); + "GSN %d(%d))", gsn, MAX_GSN); + ERROR_SET(fatal, NDBD_EXIT_ILLEGAL_SIGNAL, errorMsg, errorMsg); } theExecArray[gsn] = f; } @@ -173,8 +174,7 @@ SimulatedBlock::signal_error(Uint32 gsn, Uint32 len, Uint32 recBlockNo, "Signal (GSN: %d, Length: %d, Rec Block No: %d)", gsn, len, recBlockNo); - ErrorReporter::handleError(ecError, - BLOCK_ERROR_BNR_ZERO, + ErrorReporter::handleError(NDBD_EXIT_BLOCK_BNR_ZERO, probData, objRef); } @@ -668,7 +668,7 @@ SimulatedBlock::allocRecord(const char * type, size_t s, size_t n, bool clear) n, size); #endif - p = NdbMem_Allocate(size); + p = ndbd_malloc(size); if (p == NULL){ char buf1[255]; char buf2[255]; @@ -676,7 +676,7 @@ SimulatedBlock::allocRecord(const char * type, size_t s, size_t n, bool clear) getBlockName(number()), type); BaseString::snprintf(buf2, sizeof(buf2), "Requested: %ux%u = %u bytes", (Uint32)s, (Uint32)n, (Uint32)size); - ERROR_SET(fatal, ERR_MEMALLOC, buf1, buf2); + ERROR_SET(fatal, NDBD_EXIT_MEMALLOC, buf1, buf2); } if(clear){ @@ -699,11 +699,9 @@ void SimulatedBlock::deallocRecord(void ** ptr, const char * type, size_t s, size_t n){ (void)type; - (void)s; - (void)n; if(* ptr != 0){ - NdbMem_Free(* ptr); + ndbd_free(* ptr, n*s); * ptr = 0; } } @@ -733,7 +731,7 @@ SimulatedBlock::progError(int line, int err_code, const char* extra) const { BaseString::snprintf(&buf[0], 100, "%s (Line: %d) 0x%.8x", aBlockName, line, magicStatus); - ErrorReporter::handleError(ecError, err_code, extra, buf); + ErrorReporter::handleError(err_code, extra, buf); } @@ -743,7 +741,7 @@ SimulatedBlock::infoEvent(const char * msg, ...) const { return; Uint32 theData[25]; - theData[0] = EventReport::InfoEvent; + theData[0] = NDB_LE_InfoEvent; char * buf = (char *)&(theData[1]); va_list ap; @@ -784,7 +782,7 @@ SimulatedBlock::warningEvent(const char * msg, ...) const { return; Uint32 theData[25]; - theData[0] = EventReport::WarningEvent; + theData[0] = NDB_LE_WarningEvent; char * buf = (char *)&(theData[1]); va_list ap; @@ -854,9 +852,12 @@ SimulatedBlock::execNDB_TAMPER(Signal * signal){ void SimulatedBlock::execSIGNAL_DROPPED_REP(Signal * signal){ - ErrorReporter::handleError(ecError, - ERR_OUT_OF_LONG_SIGNAL_MEMORY, - "Signal lost, out of long signal memory", + char msg[64]; + const SignalDroppedRep * const rep = (SignalDroppedRep *)&signal->theData[0]; + snprintf(msg, sizeof(msg), "%s GSN: %u (%u,%u)", getBlockName(number()), + rep->originalGsn, rep->originalLength,rep->originalSectionCount); + ErrorReporter::handleError(NDBD_EXIT_OUT_OF_LONG_SIGNAL_MEMORY, + msg, __FILE__, NST_ErrorHandler); } @@ -913,6 +914,20 @@ SimulatedBlock::execCONTINUE_FRAGMENTED(Signal * signal){ sendSignal(reference(), GSN_CONTINUE_FRAGMENTED, signal, 1, JBB); } +void +SimulatedBlock::execNODE_START_REP(Signal* signal) +{ + // common stuff for all blocks + + // block specific stuff by virtual method override (default empty) + exec_node_start_rep(signal); +} + +void +SimulatedBlock::exec_node_start_rep(Signal* signal) +{ +} + #ifdef VM_TRACE_TIME void SimulatedBlock::clearTimes() { @@ -1739,20 +1754,6 @@ void SimulatedBlock::execUTIL_UNLOCK_CONF(Signal* signal){ c_mutexMgr.execUTIL_UNLOCK_CONF(signal); } -void -SimulatedBlock::execREAD_CONFIG_REQ(Signal* signal){ - const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr(); - - Uint32 ref = req->senderRef; - Uint32 senderData = req->senderData; - - ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend(); - conf->senderRef = reference(); - conf->senderData = senderData; - sendSignal(ref, GSN_READ_CONFIG_CONF, signal, - ReadConfigConf::SignalLength, JBB); -} - void SimulatedBlock::ignoreMutexUnlockCallback(Signal* signal, Uint32 ptrI, Uint32 retVal){ @@ -1865,3 +1866,128 @@ SimulatedBlock::init_globals_list(void ** tmp, size_t cnt){ } #endif + +#include "KeyDescriptor.hpp" + +Uint32 +SimulatedBlock::xfrm_key(Uint32 tab, const Uint32* src, + Uint32 *dst, Uint32 dstSize, + Uint32 keyPartLen[MAX_ATTRIBUTES_IN_INDEX]) const +{ + const KeyDescriptor * desc = g_key_descriptor_pool.getPtr(tab); + const Uint32 noOfKeyAttr = desc->noOfKeyAttr; + + Uint32 i = 0; + Uint32 srcPos = 0; + Uint32 dstPos = 0; + while (i < noOfKeyAttr) + { + const KeyDescriptor::KeyAttr& keyAttr = desc->keyAttr[i]; + Uint32 dstWords = + xfrm_attr(keyAttr.attributeDescriptor, keyAttr.charsetInfo, + src, srcPos, dst, dstPos, dstSize); + keyPartLen[i++] = dstWords; + if (unlikely(dstWords == 0)) + return 0; + } + + return dstPos; +} + +Uint32 +SimulatedBlock::xfrm_attr(Uint32 attrDesc, CHARSET_INFO* cs, + const Uint32* src, Uint32 & srcPos, + Uint32* dst, Uint32 & dstPos, Uint32 dstSize) const +{ + Uint32 srcBytes = AttributeDescriptor::getSizeInBytes(attrDesc); + Uint32 srcWords = (srcBytes + 3) / 4; + Uint32 dstWords = ~0; + uchar* dstPtr = (uchar*)&dst[dstPos]; + const uchar* srcPtr = (const uchar*)&src[srcPos]; + + if (cs == NULL) + { + jam(); + memcpy(dstPtr, srcPtr, srcWords << 2); + dstWords = srcWords; + } + else + { + jam(); + Uint32 typeId = AttributeDescriptor::getType(attrDesc); + Uint32 lb, len; + bool ok = NdbSqlUtil::get_var_length(typeId, srcPtr, srcBytes, lb, len); + if (unlikely(!ok)) + return 0; + Uint32 xmul = cs->strxfrm_multiply; + if (xmul == 0) + xmul = 1; + /* + * Varchar end-spaces are ignored in comparisons. To get same hash + * we blank-pad to maximum length via strnxfrm. + */ + Uint32 dstLen = xmul * (srcBytes - lb); + ndbrequire(dstLen <= ((dstSize - dstPos) << 2)); + int n = NdbSqlUtil::strnxfrm_bug7284(cs, dstPtr, dstLen, srcPtr + lb, len); + if (unlikely(n == -1)) + return 0; + while ((n & 3) != 0) + { + dstPtr[n++] = 0; + } + dstWords = (n >> 2); + } + dstPos += dstWords; + srcPos += srcWords; + return dstWords; +} + +Uint32 +SimulatedBlock::create_distr_key(Uint32 tableId, + Uint32 *data, + const Uint32 + keyPartLen[MAX_ATTRIBUTES_IN_INDEX]) const +{ + const KeyDescriptor* desc = g_key_descriptor_pool.getPtr(tableId); + const Uint32 noOfKeyAttr = desc->noOfKeyAttr; + Uint32 noOfDistrKeys = desc->noOfDistrKeys; + + Uint32 *src = data; + Uint32 *dst = data; + Uint32 i = 0; + Uint32 dstPos = 0; + + if(keyPartLen) + { + while (i < noOfKeyAttr && noOfDistrKeys) + { + Uint32 attr = desc->keyAttr[i].attributeDescriptor; + Uint32 len = keyPartLen[i]; + if(AttributeDescriptor::getDKey(attr)) + { + noOfDistrKeys--; + memmove(dst+dstPos, src, len << 2); + dstPos += len; + } + src += len; + i++; + } + } + else + { + while (i < noOfKeyAttr && noOfDistrKeys) + { + Uint32 attr = desc->keyAttr[i].attributeDescriptor; + Uint32 len = AttributeDescriptor::getSizeInWords(attr); + if(AttributeDescriptor::getDKey(attr)) + { + noOfDistrKeys--; + memmove(dst+dstPos, src, len << 2); + dstPos += len; + } + src += len; + i++; + } + } + return dstPos; +} diff --git a/ndb/src/kernel/vm/SimulatedBlock.hpp b/ndb/src/kernel/vm/SimulatedBlock.hpp index 81b4fe7413e..4a3620a00ab 100644 --- a/ndb/src/kernel/vm/SimulatedBlock.hpp +++ b/ndb/src/kernel/vm/SimulatedBlock.hpp @@ -20,11 +20,13 @@ #include <NdbTick.h> #include <kernel_types.h> #include <ndb_version.h> +#include <ndb_limits.h> #include "VMSignal.hpp" #include <RefConvert.hpp> #include <BlockNumbers.h> #include <GlobalSignalNumbers.h> + #include "pc.hpp" #include <NodeInfo.hpp> #include <NodeState.hpp> @@ -32,7 +34,6 @@ #include "LongSignal.hpp" #include <SignalLoggerManager.hpp> -#include <Error.hpp> #include <ErrorReporter.hpp> #include <ErrorHandlingMacros.hpp> @@ -385,6 +386,28 @@ protected: */ const NodeInfo & getNodeInfo(NodeId nodeId) const; NodeInfo & setNodeInfo(NodeId); + + /********************** + * Xfrm stuff + */ + + /** + * @return length + */ + Uint32 xfrm_key(Uint32 tab, const Uint32* src, + Uint32 *dst, Uint32 dstSize, + Uint32 keyPartLen[MAX_ATTRIBUTES_IN_INDEX]) const; + + Uint32 xfrm_attr(Uint32 attrDesc, CHARSET_INFO* cs, + const Uint32* src, Uint32 & srcPos, + Uint32* dst, Uint32 & dstPos, Uint32 dstSize) const; + + /** + * + */ + Uint32 create_distr_key(Uint32 tableId, + Uint32 *data, + const Uint32 keyPaLen[MAX_ATTRIBUTES_IN_INDEX])const; private: NewVARIABLE* NewVarRef; /* New Base Address Table for block */ @@ -400,6 +423,8 @@ private: void execSIGNAL_DROPPED_REP(Signal* signal); void execCONTINUE_FRAGMENTED(Signal* signal); + void execNODE_START_REP(Signal* signal); + virtual void exec_node_start_rep(Signal* signal); Uint32 c_fragmentIdCounter; ArrayPool<FragmentInfo> c_fragmentInfoPool; @@ -483,7 +508,6 @@ private: void execUTIL_UNLOCK_REF(Signal* signal); void execUTIL_UNLOCK_CONF(Signal* signal); - void execREAD_CONFIG_REQ(Signal* signal); protected: void execUPGRADE(Signal* signal); @@ -544,11 +568,11 @@ SimulatedBlock::executeFunction(GlobalSignalNumber gsn, Signal* signal){ char errorMsg[255]; if (!(gsn <= MAX_GSN)) { BaseString::snprintf(errorMsg, 255, "Illegal signal received (GSN %d too high)", gsn); - ERROR_SET(fatal, ERR_ERROR_PRGERR, errorMsg, errorMsg); + ERROR_SET(fatal, NDBD_EXIT_PRGERR, errorMsg, errorMsg); } if (!(theExecArray[gsn] != 0)) { BaseString::snprintf(errorMsg, 255, "Illegal signal received (GSN %d not added)", gsn); - ERROR_SET(fatal, ERR_ERROR_PRGERR, errorMsg, errorMsg); + ERROR_SET(fatal, NDBD_EXIT_PRGERR, errorMsg, errorMsg); } ndbrequire(false); } diff --git a/ndb/src/kernel/vm/SuperPool.cpp b/ndb/src/kernel/vm/SuperPool.cpp new file mode 100644 index 00000000000..65e5dd99629 --- /dev/null +++ b/ndb/src/kernel/vm/SuperPool.cpp @@ -0,0 +1,442 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <ndb_global.h> +#include "SuperPool.hpp" + +// SuperPool + +SuperPool::SuperPool(Uint32 pageSize, Uint32 pageBits) : + m_pageSize(SP_ALIGN_SIZE(pageSize, SP_ALIGN)), + m_pageBits(pageBits), + m_memRoot(0), + m_pageEnt(0), + m_typeCheck(0), + m_typeSeq(0), + m_pageList(), + m_totalSize(0), + m_initSize(0), + m_incrSize(0), + m_maxSize(0) +{ + assert(5 <= pageBits <= 30); +} + +bool +SuperPool::init() +{ + return true; +} + +SuperPool::~SuperPool() +{ +} + +SuperPool::PageEnt::PageEnt() : + m_pageType(0), + m_freeRecI(RNIL), + m_useCount(0), + m_nextPageI(RNIL), + m_prevPageI(RNIL) +{ +} + +SuperPool::PageList::PageList() : + m_headPageI(RNIL), + m_tailPageI(RNIL), + m_pageCount(0) +{ +} + +SuperPool::PageList::PageList(PtrI pageI) : + m_headPageI(pageI), + m_tailPageI(pageI), + m_pageCount(1) +{ +} + +SuperPool::RecInfo::RecInfo(Uint32 recType, Uint32 recSize) : + m_recType(recType), + m_recSize(recSize), + m_maxUseCount(0), + m_currPageI(RNIL), + m_currFreeRecI(RNIL), + m_currUseCount(0), + m_totalUseCount(0), + m_totalRecCount(0), + m_freeList(), + m_activeList(), + m_fullList() +{ +} + +SuperPool::PtrI +SuperPool::getPageI(void* pageP) +{ + const Uint32 pageSize = m_pageSize; + const Uint32 pageBits = m_pageBits; + const Uint32 recBits = 32 - pageBits; + void* const memRoot = m_memRoot; + assert(pageP == SP_ALIGN_PTR(pageP, memRoot, pageSize)); + my_ptrdiff_t ipL = ((Uint8*)pageP - (Uint8*)memRoot) / pageSize; + Int32 ip = (Int32)ipL; + Int32 lim = 1 << (pageBits - 1); + assert(ip == ipL && -lim <= ip && ip < lim && ip != -1); + PtrI pageI = ip << recBits; + assert(pageP == getPageP(pageI)); + return pageI; +} + +void +SuperPool::movePages(PageList& pl1, PageList& pl2) +{ + const Uint32 recBits = 32 - m_pageBits; + if (pl1.m_pageCount != 0) { + if (pl2.m_pageCount != 0) { + PtrI pageI1 = pl1.m_tailPageI; + PtrI pageI2 = pl2.m_headPageI; + PageEnt& pe1 = getPageEnt(pageI1); + PageEnt& pe2 = getPageEnt(pageI2); + pe1.m_nextPageI = pageI2; + pe2.m_prevPageI = pageI1; + pl1.m_pageCount += pl2.m_pageCount; + } + } else { + pl1 = pl2; + } + pl2.m_headPageI = pl2.m_tailPageI = RNIL; + pl2.m_pageCount = 0; +} + +void +SuperPool::addHeadPage(PageList& pl, PtrI pageI) +{ + PageList pl2(pageI); + movePages(pl2, pl); + pl = pl2; +} + +void +SuperPool::addTailPage(PageList& pl, PtrI pageI) +{ + PageList pl2(pageI); + movePages(pl, pl2); +} + +void +SuperPool::removePage(PageList& pl, PtrI pageI) +{ + PageEnt& pe = getPageEnt(pageI); + PtrI pageI1 = pe.m_prevPageI; + PtrI pageI2 = pe.m_nextPageI; + if (pageI1 != RNIL) { + PageEnt& pe1 = getPageEnt(pageI1); + pe1.m_nextPageI = pageI2; + if (pageI2 != RNIL) { + PageEnt& pe2 = getPageEnt(pageI2); + pe2.m_prevPageI = pageI1; + } else { + pl.m_tailPageI = pageI1; + } + } else { + if (pageI2 != RNIL) { + PageEnt& pe2 = getPageEnt(pageI2); + pe2.m_prevPageI = pageI1; + pl.m_headPageI = pageI2; + } else { + pl.m_headPageI = pl.m_tailPageI = RNIL; + } + } + pe.m_prevPageI = pe.m_nextPageI = RNIL; + assert(pl.m_pageCount != 0); + pl.m_pageCount--; +} + +void +SuperPool::setCurrPage(RecInfo& ri, PtrI newPageI) +{ + PtrI oldPageI = ri.m_currPageI; + if (oldPageI != RNIL) { + // copy from cache + PageEnt& pe = getPageEnt(oldPageI); + pe.m_freeRecI = ri.m_currFreeRecI; + pe.m_useCount = ri.m_currUseCount; + // add to right list according to "pp2" policy + if (pe.m_useCount == 0) { + pe.m_pageType = 0; + addHeadPage(m_pageList, oldPageI); + ri.m_totalRecCount -= ri.m_maxUseCount; + } else if (pe.m_useCount < ri.m_maxUseCount) { + addHeadPage(ri.m_activeList, oldPageI); + } else { + addHeadPage(ri.m_fullList, oldPageI); + } + } + if (newPageI != RNIL) { + PageEnt& pe = getPageEnt(newPageI); + // copy to cache + ri.m_currPageI = newPageI; + ri.m_currFreeRecI = pe.m_freeRecI; + ri.m_currUseCount = pe.m_useCount; + // remove from right list + if (pe.m_useCount == 0) { + removePage(ri.m_freeList, newPageI); + } else if (pe.m_useCount < ri.m_maxUseCount) { + removePage(ri.m_activeList, newPageI); + } else { + removePage(ri.m_fullList, newPageI); + } + } else { + ri.m_currPageI = RNIL; + ri.m_currFreeRecI = RNIL; + ri.m_currUseCount = 0; + } +} + +bool +SuperPool::getAvailPage(RecInfo& ri) +{ + PtrI pageI; + if ((pageI = ri.m_activeList.m_headPageI) != RNIL || + (pageI = ri.m_freeList.m_headPageI) != RNIL || + (pageI = getFreePage(ri)) != RNIL) { + setCurrPage(ri, pageI); + return true; + } + return false; +} + +SuperPool::PtrI +SuperPool::getFreePage(RecInfo& ri) +{ + PtrI pageI; + if (m_pageList.m_pageCount != 0) { + pageI = m_pageList.m_headPageI; + removePage(m_pageList, pageI); + } else { + pageI = getNewPage(); + if (pageI == RNIL) + return RNIL; + } + void* pageP = getPageP(pageI); + // set up free record list + Uint32 maxUseCount = ri.m_maxUseCount; + Uint32 recSize = ri.m_recSize; + void* recP = (Uint8*)pageP; + Uint32 irNext = 1; + while (irNext < maxUseCount) { + *(Uint32*)recP = pageI | irNext; + recP = (Uint8*)recP + recSize; + irNext++; + } + *(Uint32*)recP = RNIL; + // add to total record count + ri.m_totalRecCount += maxUseCount; + // set up new page entry + PageEnt& pe = getPageEnt(pageI); + new (&pe) PageEnt(); + pe.m_pageType = ri.m_recType; + pe.m_freeRecI = pageI | 0; + pe.m_useCount = 0; + // set type check bits + setCheckBits(pageI, ri.m_recType); + // add to record pool free list + addHeadPage(ri.m_freeList, pageI); + return pageI; +} + +void +SuperPool::setSizes(size_t initSize, size_t incrSize, size_t maxSize) +{ + const Uint32 pageSize = m_pageSize; + m_initSize = SP_ALIGN_SIZE(initSize, pageSize); + m_incrSize = SP_ALIGN_SIZE(incrSize, pageSize); + m_maxSize = SP_ALIGN_SIZE(maxSize, pageSize); +} + +void +SuperPool::verify(RecInfo& ri) +{ + PageList* plList[3] = { &ri.m_freeList, &ri.m_activeList, &ri.m_fullList }; + for (int i = 0; i < 3; i++) { + PageList& pl = *plList[i]; + unsigned count = 0; + PtrI pageI = pl.m_headPageI; + while (pageI != RNIL) { + PageEnt& pe = getPageEnt(pageI); + PtrI pageI1 = pe.m_prevPageI; + PtrI pageI2 = pe.m_nextPageI; + if (count == 0) { + assert(pageI1 == RNIL); + } else { + assert(pageI1 != RNIL); + PageEnt& pe1 = getPageEnt(pageI1); + assert(pe1.m_nextPageI == pageI); + if (pageI2 != RNIL) { + PageEnt& pe2 = getPageEnt(pageI2); + assert(pe2.m_prevPageI == pageI); + } + } + pageI = pageI2; + count++; + } + assert(pl.m_pageCount == count); + } +} + +// HeapPool + +HeapPool::HeapPool(Uint32 pageSize, Uint32 pageBits) : + SuperPool(pageSize, pageBits), + m_areaHead(), + m_currArea(&m_areaHead), + m_lastArea(&m_areaHead), + m_mallocPart(4) +{ +} + +bool +HeapPool::init() +{ + const Uint32 pageBits = m_pageBits; + if (! SuperPool::init()) + return false;; + // allocate page entry array + Uint32 peBytes = (1 << pageBits) * sizeof(PageEnt); + m_pageEnt = static_cast<PageEnt*>(malloc(peBytes)); + if (m_pageEnt == 0) + return false; + memset(m_pageEnt, 0, peBytes); + // allocate type check array + Uint32 tcWords = 1 << (pageBits - (5 - SP_CHECK_LOG2)); + m_typeCheck = static_cast<Uint32*>(malloc(tcWords << 2)); + if (m_typeCheck == 0) + return false; + memset(m_typeCheck, 0, tcWords << 2); + // allocate initial data + assert(m_totalSize == 0); + if (! allocMoreData(m_initSize)) + return false; + return true; +} + +HeapPool::~HeapPool() +{ + free(m_pageEnt); + free(m_typeCheck); + Area* ap; + while ((ap = m_areaHead.m_nextArea) != 0) { + m_areaHead.m_nextArea = ap->m_nextArea; + free(ap->m_memory); + free(ap); + } +} + +HeapPool::Area::Area() : + m_nextArea(0), + m_firstPageI(RNIL), + m_currPage(0), + m_numPages(0), + m_memory(0) +{ +} + +SuperPool::PtrI +HeapPool::getNewPage() +{ + const Uint32 pageSize = m_pageSize; + const Uint32 pageBits = m_pageBits; + const Uint32 recBits= 32 - pageBits; + Area* ap = m_currArea; + if (ap->m_currPage == ap->m_numPages) { + // area is used up + if (ap->m_nextArea == 0) { + // todo dynamic increase + assert(m_incrSize == 0); + return RNIL; + } + ap = m_currArea = ap->m_nextArea; + } + assert(ap->m_currPage < ap->m_numPages); + PtrI pageI = ap->m_firstPageI; + Int32 ip = (Int32)pageI >> recBits; + ip += ap->m_currPage; + pageI = ip << recBits; + ap->m_currPage++; + return pageI; +} + +bool +HeapPool::allocMoreData(size_t size) +{ + const Uint32 pageSize = m_pageSize; + const Uint32 pageBits = m_pageBits; + const Uint32 recBits = 32 - pageBits; + const Uint32 incrSize = m_incrSize; + const Uint32 incrPages = incrSize / pageSize; + const Uint32 mallocPart = m_mallocPart; + size = SP_ALIGN_SIZE(size, pageSize); + if (incrSize != 0) + size = SP_ALIGN_SIZE(size, incrSize); + Uint32 needPages = size / pageSize; + while (needPages != 0) { + Uint32 wantPages = needPages; + if (incrPages != 0 && wantPages > incrPages) + wantPages = incrPages; + Uint32 tryPages = 0; + void* p1 = 0; + for (Uint32 i = mallocPart; i > 0 && p1 == 0; i--) { + // one page is usually wasted due to alignment to memory root + tryPages = ((wantPages + 1) * i) / mallocPart; + if (tryPages < 2) + break; + p1 = malloc(pageSize * tryPages); + } + if (p1 == 0) + return false; + if (m_memRoot == 0) { + // set memory root at first "big" alloc + // assume malloc header makes later ip = -1 impossible + m_memRoot = p1; + } + void* p2 = SP_ALIGN_PTR(p1, m_memRoot, pageSize); + Uint32 numPages = tryPages - (p1 != p2); + my_ptrdiff_t ipL = ((Uint8*)p2 - (Uint8*)m_memRoot) / pageSize; + Int32 ip = (Int32)ipL; + Int32 lim = 1 << (pageBits - 1); + if (! (ip == ipL && -lim <= ip && ip + numPages < lim)) { + free(p1); + return false; + } + assert(ip != -1); + PtrI pageI = ip << recBits; + needPages = (needPages >= numPages ? needPages - numPages : 0); + m_totalSize += numPages * pageSize; + // allocate new area + Area* ap = static_cast<Area*>(malloc(sizeof(Area))); + if (ap == 0) { + free(p1); + return false; + } + new (ap) Area(); + ap->m_firstPageI = pageI; + ap->m_numPages = numPages; + ap->m_memory = p1; + m_lastArea->m_nextArea = ap; + m_lastArea = ap; + } + return true; +} diff --git a/ndb/src/kernel/vm/SuperPool.hpp b/ndb/src/kernel/vm/SuperPool.hpp new file mode 100644 index 00000000000..157c75aa0d5 --- /dev/null +++ b/ndb/src/kernel/vm/SuperPool.hpp @@ -0,0 +1,561 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef SUPER_POOL_HPP +#define SUPER_POOL_HPP + +#include <ndb_global.h> + +#include <pc.hpp> +#include <ErrorReporter.hpp> + +#define NDB_SP_VERIFY_LEVEL 1 + +/* + * SuperPool - super pool for record pools (abstract class) + * + * Documents SuperPool and RecordPool<T>. + * + * GENERAL + * + * A "super pool" is a shared pool of pages of fixed size. A "record + * pool" is a pool of records of fixed size. One super pool instance is + * used by any number of record pools to allocate their memory. + * A special case is a "page pool" where a record is a simple page, + * possibly smaller than super pool page. + * + * A record pool allocates memory in pages. Thus each used page is + * associated with one record pool and one record type. The records on + * a page form an array starting at start of page. Thus each record has + * an index within the page. Any last partial record which does not fit + * on the page is disregarded. + * + * I-VALUE + * + * The old "i-p" principle is kept. A reference to a super pool page or + * record is stored as an "i-value" from which the record pointer "p" is + * computed. In super pool the i-value is a Uint32 with two parts: + * + * - "ip" index of page within super pool (high pageBits) + * - "ir" index of record within page (low recBits) + * + * The translation between "ip" and page address is described in next + * section. Once page address is known, the record address is found + * from "ir" in the obvious way. + * + * The main advantage with i-value is that it can be verified. The + * level of verification depends on compile type (release, debug). + * + * - "v0" minimal sanity check + * - "v1" check record type matches page type, see below + * - "v2" check record is in use (not yet implemented) + * + * Another advantage of a 32-bit i-value is that it extends the space of + * 32-bit addressable records on a 64-bit platform. + * + * RNIL is 0xffffff00 and indicates NULL i-value. To avoid hitting RNIL + * it is required that pageBits <= 30 and that the maximum value of the + * range (2^pageBits-1) is not used. + * + * MEMORY ROOT + * + * This super pool requires a "memory root" i.e. a memory address such + * that the index of a page "ip" satisfies + * + * page address = memory root + (signed)ip * page size + * + * This is possible on most platforms, provided that the memory root and + * all pages are either on the heap or on the stack, in order to keep + * the size of "ip" reasonably small. + * + * The cast (signed)ip is done as integer of pageBits bits. "ip" has + * same sign bit as i-value "i" so (signed)ip = (Int32)i >> recBits. + * The RNIL restriction can be expressed as (signed)ip != -1. + * + * PAGE ENTRIES + * + * Each super pool page has a "page entry". It contains: + * + * - page type + * - i-value of first free record on page + * - page use count, to see if page can be freed + * - pointers (as i-values) to next and previous page in list + * + * Page entry cannot be stored on the page itself since this prevents + * aligning pages to OS block size and the use of BATs (don't ask) for + * page pools in NDB. For now the implementation provides an array of + * page entries with place for all (2^pageBits) entries. + * + * PAGE TYPE + * + * Page type is (in principle) unique to the record pool using the super + * pool. It is assigned in record pool constructor. Page type zero + * means that the page is free i.e. not allocated to a record pool. + * + * Each "i-p" conversion checks ("v1") that the record belongs to same + * pool as the page. This check is much more common than page or record + * allocation. To make it cache effective, there is a separate array of + * reduced "type bits" (computed from real type). + * + * FREE LISTS + * + * A record is either used or on the free list of the record pool. + * A page has a use count i.e. number of used records. When use count + * drops to zero the page can be returned to the super pool. This is + * not necessarily done at once, or ever. + * + * To make freeing pages feasible, the record pool free list has two + * levels. There are available pages (some free) and a singly linked + * free list within the page. A page allocated to record pool is on one + * of 4 lists: + * + * - free page list (all free, available) + * - active page list (some free, some used, available) + * - full page list (none free) + * - current page (list of 1), see below + * + * Some usage types (temporary pools) may never free records. They pay + * a small penalty for the extra overhead. + * + * RECORD POOL + * + * A pool of records which allocates its memory from a super pool + * instance specified in the constructor. There are 3 basic operations: + * + * - getPtr - translate i-value to pointer-to-record p + * - seize - allocate record + * - release - free record + * + * CURRENT PAGE + * + * getPtr is a fast computation which does not touch the page. For + * seize and release there is an optimization: + * + * Define "current page" as page of latest seize or release. Its page + * entry is cached under record pool instance. The page is removed from + * its normal list. Seize and release on current page are fast and + * avoid touching the page. The current page is used until + * + * - seize and current page is full + * - release and the page is not current page + * + * Then the real page entry is updated and the page is added to the + * appropriate list, and a new page is made current. + * + * PAGE POLICY + * + * Allocating new page to record pool is expensive. Therefore record + * pool should not always return empty pages to super pool. There are + * two trivial policies, each with problems: + * + * - "pp1" never return empty page to super pool + * - "pp2" always return empty page to super pool + * + * This implementation uses "pp2" for now. A real policy is implemented + * in next version. + * + * OPEN ISSUES AND LIMITATIONS + * + * - smarter (virtual) placement of check bits & page entries + * - should getPtr etc be inlined? (too much code) + * - real page policy + * - other implementations (only HeapPool is done) + * - super pool list of all record pools, for statistics etc + * - access by multiple threads is not supported + */ + +// align size +#define SP_ALIGN_SIZE(sz, al) \ + (((sz) + (al) - 1) & ~((al) - 1)) + +// align pointer relative to base +#define SP_ALIGN_PTR(p, base, al) \ + (void*)((Uint8*)(base) + SP_ALIGN_SIZE((Uint8*)(p) - (Uint8*)(base), (al))) + +class SuperPool { +public: + // Type of i-value, used to reference both pages and records. Page + // index "ip" occupies the high bits. The i-value of a page is same + // as i-value of record 0 on the page. + typedef Uint32 PtrI; + + // Size and address alignment given as number of bytes (power of 2). + STATIC_CONST( SP_ALIGN = 8 ); + + // Page entry. Current|y allocated as array of (2^pageBits). + struct PageEnt { + PageEnt(); + Uint32 m_pageType; + Uint32 m_freeRecI; + Uint32 m_useCount; + PtrI m_nextPageI; + PtrI m_prevPageI; + }; + + // Number of bits for cache effective type check given as log of 2. + // Example: 2 means 4 bits and uses 32k for 2g of 32k pages. + STATIC_CONST( SP_CHECK_LOG2 = 2 ); + + // Doubly-linked list of pages. There is one free list in super pool + // and free, active, full list in each record pool. + struct PageList { + PageList(); + PageList(PtrI pageI); + PtrI m_headPageI; + PtrI m_tailPageI; + Uint32 m_pageCount; + }; + + // Record pool information. Each record pool instance contains one. + struct RecInfo { + RecInfo(Uint32 recType, Uint32 recSize); + const Uint32 m_recType; + const Uint32 m_recSize; + Uint32 m_maxUseCount; // could be computed + Uint32 m_currPageI; // current page + Uint32 m_currFreeRecI; + Uint32 m_currUseCount; + Uint32 m_totalUseCount; // total per pool + Uint32 m_totalRecCount; + PageList m_freeList; + PageList m_activeList; + PageList m_fullList; + }; + + // Constructor. Gives page size in bytes (excluding page header) and + // number of bits to use for page index "ip" in i-value. + SuperPool(Uint32 pageSize, Uint32 pageBits); + + // Initialize. Must be called after setting sizes or other parameters + // and before the pool is used. + virtual bool init(); + + // Destructor. + virtual ~SuperPool() = 0; + + // Translate i-value to page entry. + PageEnt& getPageEnt(PtrI pageI); + + // Translate i-value to page address. + void* getPageP(PtrI pageI); + + // Translate page address to i-value (unused). + PtrI getPageI(void* pageP); + + // Given type, return non-zero reduced type check bits. + Uint32 makeCheckBits(Uint32 type); + + // Get type check bits from type check array. + Uint32 getCheckBits(PtrI pageI); + + // Set type check bits in type check array. + void setCheckBits(PtrI pageI, Uint32 type); + + // Translate i-value to record address. + void* getRecP(PtrI recI, RecInfo& ri); + + // Move all pages from second list to end of first list. + void movePages(PageList& pl1, PageList& pl2); + + // Add page to beginning of page list. + void addHeadPage(PageList& pl, PtrI pageI); + + // Add page to end of page list. + void addTailPage(PageList& pl, PtrI pageI); + + // Remove any page from page list. + void removePage(PageList& pl, PtrI pageI); + + // Set current page. Previous current page is updated and added to + // appropriate list. + void setCurrPage(RecInfo& ri, PtrI pageI); + + // Get page with some free records and make it current. Takes head of + // active or free list, or else gets free page from super pool. + bool getAvailPage(RecInfo& ri); + + // Get free page from super pool and add it to record pool free list. + // This is an expensive subroutine of getAvailPage(). + PtrI getFreePage(RecInfo& ri); + + // Get new free page from the implementation. + virtual PtrI getNewPage() = 0; + + // Set 3 size parameters, rounded to page size. If called before + // init() then init() allocates the initial size. + void setSizes(size_t initSize = 0, size_t incrSize = 0, size_t maxSize = 0); + + const Uint32 m_pageSize; + const Uint32 m_pageBits; + // implementation must set up these pointers + void* m_memRoot; + PageEnt* m_pageEnt; + Uint32* m_typeCheck; + Uint32 m_typeSeq; + PageList m_pageList; + size_t m_totalSize; + size_t m_initSize; + size_t m_incrSize; + size_t m_maxSize; + + // Debugging. + void verify(RecInfo& ri); +}; + +inline SuperPool::PageEnt& +SuperPool::getPageEnt(PtrI pageI) +{ + Uint32 ip = pageI >> (32 - m_pageBits); + return m_pageEnt[ip]; +} + +inline void* +SuperPool::getPageP(PtrI ptrI) +{ + Int32 ip = (Int32)ptrI >> (32 - m_pageBits); + my_ptrdiff_t sz = m_pageSize; + void* pageP = (Uint8*)m_memRoot + ip * sz; + return pageP; +} + +inline Uint32 +SuperPool::makeCheckBits(Uint32 type) +{ + Uint32 shift = 1 << SP_CHECK_LOG2; + Uint32 mask = (1 << shift) - 1; + return 1 + type % mask; +} + +inline Uint32 +SuperPool::getCheckBits(PtrI pageI) +{ + Uint32 ip = pageI >> (32 - m_pageBits); + Uint32 xp = ip >> (5 - SP_CHECK_LOG2); + Uint32 yp = ip & (1 << (5 - SP_CHECK_LOG2)) - 1; + Uint32& w = m_typeCheck[xp]; + Uint32 shift = 1 << SP_CHECK_LOG2; + Uint32 mask = (1 << shift) - 1; + // get + Uint32 bits = (w >> yp * shift) & mask; + return bits; +} + +inline void +SuperPool::setCheckBits(PtrI pageI, Uint32 type) +{ + Uint32 ip = pageI >> (32 - m_pageBits); + Uint32 xp = ip >> (5 - SP_CHECK_LOG2); + Uint32 yp = ip & (1 << (5 - SP_CHECK_LOG2)) - 1; + Uint32& w = m_typeCheck[xp]; + Uint32 shift = 1 << SP_CHECK_LOG2; + Uint32 mask = (1 << shift) - 1; + // set + Uint32 bits = makeCheckBits(type); + w &= ~(mask << yp * shift); + w |= (bits << yp * shift); +} + +inline void* +SuperPool::getRecP(PtrI ptrI, RecInfo& ri) +{ + const Uint32 recMask = (1 << (32 - m_pageBits)) - 1; + PtrI pageI = ptrI & ~recMask; +#if NDB_SP_VERIFY_LEVEL >= 1 + Uint32 bits1 = getCheckBits(pageI); + Uint32 bits2 = makeCheckBits(ri.m_recType); + assert(bits1 == bits2); +#endif + void* pageP = getPageP(pageI); + Uint32 ir = ptrI & recMask; + void* recP = (Uint8*)pageP + ir * ri.m_recSize; + return recP; +} + +/* + * HeapPool - SuperPool on heap (concrete class) + * + * A super pool based on malloc with memory root on the heap. This + * pool type has 2 realistic uses: + * + * - a small pool with only initial malloc and pageBits set to match + * - the big pool from which all heap allocations are done + * + * A "smart" malloc may break "ip" limit by using different VM areas for + * different sized requests. For this reason malloc is done in units of + * increment size if possible. Memory root is set to start of first + * malloc. + */ + +class HeapPool : public SuperPool { +public: + // Describes malloc area. The areas are kept in singly linked list. + // There is a list head and pointers to current and last area. + struct Area { + Area(); + Area* m_nextArea; + PtrI m_firstPageI; + Uint32 m_currPage; + Uint32 m_numPages; + void* m_memory; + }; + + // Constructor. + HeapPool(Uint32 pageSize, Uint32 pageBits); + + // Initialize. + virtual bool init(); + + // Destructor. + virtual ~HeapPool(); + + // Use malloc to allocate more. + bool allocMoreData(size_t size); + + // Get new page from current area. + virtual PtrI getNewPage(); + + // List of malloc areas. + Area m_areaHead; + Area* m_currArea; + Area* m_lastArea; + + // Fraction of malloc size to try if cannot get all in one. + Uint32 m_mallocPart; +}; + +/* + * RecordPool - record pool using one super pool instance (template) + * + * Documented under SuperPool. Satisfies ArrayPool interface. + */ + +template <class T> +class RecordPool { +public: + // Constructor. + RecordPool(SuperPool& superPool); + + // Destructor. + ~RecordPool(); + + // Update pointer ptr.p according to i-value ptr.i. + void getPtr(Ptr<T>& ptr); + + // Allocate record from the pool. + bool seize(Ptr<T>& ptr); + + // Return record to the pool. + void release(Ptr<T>& ptr); + + // todo variants of basic methods + + // Return all pages to super pool. The force flag is required if + // there are any used records. + void free(bool force); + + SuperPool& m_superPool; + SuperPool::RecInfo m_recInfo; +}; + +template <class T> +inline +RecordPool<T>::RecordPool(SuperPool& superPool) : + m_superPool(superPool), + m_recInfo(1 + superPool.m_typeSeq++, sizeof(T)) +{ + SuperPool::RecInfo& ri = m_recInfo; + assert(sizeof(T) == SP_ALIGN_SIZE(sizeof(T), sizeof(Uint32))); + Uint32 maxUseCount = superPool.m_pageSize / sizeof(T); + Uint32 sizeLimit = 1 << (32 - superPool.m_pageBits); + if (maxUseCount >= sizeLimit) + maxUseCount = sizeLimit; + ri.m_maxUseCount = maxUseCount; +} + +template <class T> +inline +RecordPool<T>::~RecordPool() +{ + free(true); +} + +template <class T> +inline void +RecordPool<T>::getPtr(Ptr<T>& ptr) +{ + void* recP = m_superPool.getRecP(ptr.i, m_recInfo); + ptr.p = static_cast<T*>(recP); +} + +template <class T> +inline bool +RecordPool<T>::seize(Ptr<T>& ptr) +{ + SuperPool& sp = m_superPool; + SuperPool::RecInfo& ri = m_recInfo; + if (ri.m_currFreeRecI != RNIL || sp.getAvailPage(ri)) { + SuperPool::PtrI recI = ri.m_currFreeRecI; + void* recP = sp.getRecP(recI, ri); + ri.m_currFreeRecI = *(Uint32*)recP; + Uint32 useCount = ri.m_currUseCount; + assert(useCount < ri.m_maxUseCount); + ri.m_currUseCount = useCount + 1; + ri.m_totalUseCount++; + ptr.i = recI; + ptr.p = static_cast<T*>(recP); + return true; + } + return false; +} + +template <class T> +inline void +RecordPool<T>::release(Ptr<T>& ptr) +{ + SuperPool& sp = m_superPool; + SuperPool::RecInfo& ri = m_recInfo; + const Uint32 recMask = (1 << (32 - sp.m_pageBits)) - 1; + SuperPool::PtrI recI = ptr.i; + SuperPool::PtrI pageI = recI & ~recMask; + if (pageI != ri.m_currPageI) { + sp.setCurrPage(ri, pageI); + } + void* recP = sp.getRecP(recI, ri); + *(Uint32*)recP = ri.m_currFreeRecI; + ri.m_currFreeRecI = recI; + Uint32 useCount = ri.m_currUseCount; + assert(useCount != 0); + ri.m_currUseCount = useCount - 1; + ri.m_totalUseCount--; + ptr.i = RNIL; + ptr.p = 0; +} + +template <class T> +inline void +RecordPool<T>::free(bool force) +{ + SuperPool& sp = m_superPool; + SuperPool::RecInfo& ri = m_recInfo; + sp.setCurrPage(ri, RNIL); + assert(force || ri.m_totalUseCount == 0); + sp.movePages(sp.m_pageList, ri.m_freeList); + sp.movePages(sp.m_pageList, ri.m_activeList); + sp.movePages(sp.m_pageList, ri.m_fullList); + ri.m_totalRecCount = 0; +} + +#endif diff --git a/ndb/src/kernel/vm/TimeQueue.cpp b/ndb/src/kernel/vm/TimeQueue.cpp index 56988c2e3da..0b620c75d52 100644 --- a/ndb/src/kernel/vm/TimeQueue.cpp +++ b/ndb/src/kernel/vm/TimeQueue.cpp @@ -19,7 +19,6 @@ #include <GlobalData.hpp> #include <FastScheduler.hpp> #include <VMSignal.hpp> -#include <Error.hpp> static const int MAX_TIME_QUEUE_VALUE = 32000; @@ -70,7 +69,7 @@ TimeQueue::insert(Signal* signal, BlockNumber bnr, if (regShortIndex == 0){ theShortQueue[0].copy_struct = newEntry.copy_struct; } else if (regShortIndex >= MAX_NO_OF_SHORT_TQ - 1) { - ERROR_SET(ecError, ERROR_TIME_QUEUE_SHORT, + ERROR_SET(ecError, NDBD_EXIT_TIME_QUEUE_SHORT, "Too many in Short Time Queue", "TimeQueue.C" ); } else { for (i = 0; i < regShortIndex; i++) { @@ -99,7 +98,7 @@ TimeQueue::insert(Signal* signal, BlockNumber bnr, if (regLongIndex == 0) { theLongQueue[0].copy_struct = newEntry.copy_struct; } else if (regLongIndex >= MAX_NO_OF_LONG_TQ - 1) { - ERROR_SET(ecError, ERROR_TIME_QUEUE_LONG, + ERROR_SET(ecError, NDBD_EXIT_TIME_QUEUE_LONG, "Too many in Long Time Queue", "TimeQueue.C" ); } else { for (i = 0; i < regLongIndex; i++) { @@ -124,7 +123,7 @@ TimeQueue::insert(Signal* signal, BlockNumber bnr, } globalData.theLongTQIndex = regLongIndex + 1; } else { - ERROR_SET(ecError, ERROR_TIME_QUEUE_DELAY, + ERROR_SET(ecError, NDBD_EXIT_TIME_QUEUE_DELAY, "Too long delay for Time Queue", "TimeQueue.C" ); } } @@ -194,7 +193,7 @@ TimeQueue::getIndex() Uint32 retValue = globalData.theFirstFreeTQIndex; globalData.theFirstFreeTQIndex = (Uint32)theFreeIndex[retValue]; if (retValue >= MAX_NO_OF_TQ) - ERROR_SET(fatal, ERROR_TIME_QUEUE_INDEX, + ERROR_SET(fatal, NDBD_EXIT_TIME_QUEUE_INDEX, "Index out of range", "TimeQueue.C" ); return retValue; } diff --git a/ndb/src/kernel/vm/TransporterCallback.cpp b/ndb/src/kernel/vm/TransporterCallback.cpp index ba929b7ea7a..f315918b871 100644 --- a/ndb/src/kernel/vm/TransporterCallback.cpp +++ b/ndb/src/kernel/vm/TransporterCallback.cpp @@ -33,11 +33,34 @@ #include <NdbOut.hpp> #include "DataBuffer.hpp" + /** * The instance */ SectionSegmentPool g_sectionSegmentPool; +struct ConnectionError +{ + enum TransporterError err; + const char *text; +}; + +static const ConnectionError connectionError[] = +{ + { TE_NO_ERROR, "No error"}, + { TE_SHM_UNABLE_TO_CREATE_SEGMENT, "Unable to create shared memory segment"}, + { (enum TransporterError) -1, "No connection error message available (please report a bug)"} +}; + +const char *lookupConnectionError(Uint32 err) +{ + int i= 0; + while ((Uint32)connectionError[i].err != err && + (Uint32)connectionError[i].err != -1) + i++; + return connectionError[i].text; +} + bool import(Ptr<SectionSegment> & first, const Uint32 * src, Uint32 len){ /** @@ -305,41 +328,61 @@ checkJobBuffer() { } void -reportError(void * callbackObj, NodeId nodeId, TransporterError errorCode){ +reportError(void * callbackObj, NodeId nodeId, + TransporterError errorCode, const char *info) +{ #ifdef DEBUG_TRANSPORTER - char buf[255]; - sprintf(buf, "reportError (%d, 0x%x)", nodeId, errorCode); - ndbout << buf << endl; + ndbout_c("reportError (%d, 0x%x) %s", nodeId, errorCode, info ? info : "") #endif - if(errorCode == TE_SIGNAL_LOST_SEND_BUFFER_FULL){ - ErrorReporter::handleError(ecError, - ERR_PROGRAMERROR, - "Signal lost, send buffer full", - __FILE__, - NST_ErrorHandler); + DBUG_ENTER("reportError"); + DBUG_PRINT("info",("nodeId %d errorCode: 0x%x info: %s", + nodeId, errorCode, info)); + + switch (errorCode) + { + case TE_SIGNAL_LOST_SEND_BUFFER_FULL: + { + char msg[64]; + snprintf(msg, sizeof(msg), "Remote note id %d.%s%s", nodeId, + info ? " " : "", info ? info : ""); + ErrorReporter::handleError(NDBD_EXIT_SIGNAL_LOST_SEND_BUFFER_FULL, + msg, __FILE__, NST_ErrorHandler); } - - if(errorCode == TE_SIGNAL_LOST){ - ErrorReporter::handleError(ecError, - ERR_PROGRAMERROR, - "Signal lost (unknown reason)", - __FILE__, - NST_ErrorHandler); + case TE_SIGNAL_LOST: + { + char msg[64]; + snprintf(msg, sizeof(msg), "Remote node id %d,%s%s", nodeId, + info ? " " : "", info ? info : ""); + ErrorReporter::handleError(NDBD_EXIT_SIGNAL_LOST, + msg, __FILE__, NST_ErrorHandler); } - - if(errorCode & 0x8000){ + case TE_SHM_IPC_PERMANENT: + { + char msg[128]; + snprintf(msg, sizeof(msg), + "Remote node id %d.%s%s", + nodeId, info ? " " : "", info ? info : ""); + ErrorReporter::handleError(NDBD_EXIT_CONNECTION_SETUP_FAILED, + msg, __FILE__, NST_ErrorHandler); + } + default: + break; + } + + if(errorCode & TE_DO_DISCONNECT){ reportDisconnect(callbackObj, nodeId, errorCode); } - Signal signal; + SignalT<3> signalT; + Signal &signal= *(Signal*)&signalT; memset(&signal.header, 0, sizeof(signal.header)); - if(errorCode & 0x8000) - signal.theData[0] = EventReport::TransporterError; + if(errorCode & TE_DO_DISCONNECT) + signal.theData[0] = NDB_LE_TransporterError; else - signal.theData[0] = EventReport::TransporterWarning; + signal.theData[0] = NDB_LE_TransporterWarning; signal.theData[1] = nodeId; signal.theData[2] = errorCode; @@ -348,6 +391,8 @@ reportError(void * callbackObj, NodeId nodeId, TransporterError errorCode){ signal.header.theSendersSignalId = 0; signal.header.theSendersBlockRef = numberToRef(0, globalData.ownId); globalScheduler.execute(&signal, JBA, CMVMI, GSN_EVENT_REP); + + DBUG_VOID_RETURN; } /** @@ -357,13 +402,14 @@ void reportSendLen(void * callbackObj, NodeId nodeId, Uint32 count, Uint64 bytes){ - Signal signal; + SignalT<3> signalT; + Signal &signal= *(Signal*)&signalT; memset(&signal.header, 0, sizeof(signal.header)); signal.header.theLength = 3; signal.header.theSendersSignalId = 0; signal.header.theSendersBlockRef = numberToRef(0, globalData.ownId); - signal.theData[0] = EventReport::SendBytesStatistic; + signal.theData[0] = NDB_LE_SendBytesStatistic; signal.theData[1] = nodeId; signal.theData[2] = (bytes/count); globalScheduler.execute(&signal, JBA, CMVMI, GSN_EVENT_REP); @@ -376,13 +422,14 @@ void reportReceiveLen(void * callbackObj, NodeId nodeId, Uint32 count, Uint64 bytes){ - Signal signal; + SignalT<3> signalT; + Signal &signal= *(Signal*)&signalT; memset(&signal.header, 0, sizeof(signal.header)); signal.header.theLength = 3; signal.header.theSendersSignalId = 0; signal.header.theSendersBlockRef = numberToRef(0, globalData.ownId); - signal.theData[0] = EventReport::ReceiveBytesStatistic; + signal.theData[0] = NDB_LE_ReceiveBytesStatistic; signal.theData[1] = nodeId; signal.theData[2] = (bytes/count); globalScheduler.execute(&signal, JBA, CMVMI, GSN_EVENT_REP); @@ -395,7 +442,8 @@ reportReceiveLen(void * callbackObj, void reportConnect(void * callbackObj, NodeId nodeId){ - Signal signal; + SignalT<1> signalT; + Signal &signal= *(Signal*)&signalT; memset(&signal.header, 0, sizeof(signal.header)); signal.header.theLength = 1; @@ -412,7 +460,10 @@ reportConnect(void * callbackObj, NodeId nodeId){ void reportDisconnect(void * callbackObj, NodeId nodeId, Uint32 errNo){ - Signal signal; + DBUG_ENTER("reportDisconnect"); + + SignalT<sizeof(DisconnectRep)/4> signalT; + Signal &signal= *(Signal*)&signalT; memset(&signal.header, 0, sizeof(signal.header)); signal.header.theLength = DisconnectRep::SignalLength; @@ -425,6 +476,8 @@ reportDisconnect(void * callbackObj, NodeId nodeId, Uint32 errNo){ rep->err = errNo; globalScheduler.execute(&signal, JBA, CMVMI, GSN_DISCONNECT_REP); + + DBUG_VOID_RETURN; } void @@ -452,3 +505,8 @@ SignalLoggerManager::printSegmentedSection(FILE * output, putc('\n', output); } +void +transporter_recv_from(void * callbackObj, NodeId nodeId){ + globalData.m_nodeInfo[nodeId].m_heartbeat_cnt= 0; + return; +} diff --git a/ndb/src/kernel/vm/VMSignal.hpp b/ndb/src/kernel/vm/VMSignal.hpp index 9111ee7949c..33f8a9f25c0 100644 --- a/ndb/src/kernel/vm/VMSignal.hpp +++ b/ndb/src/kernel/vm/VMSignal.hpp @@ -42,6 +42,16 @@ struct NodeReceiverGroup { NodeBitmask m_nodes; }; +template <unsigned T> struct SignalT +{ + SignalHeader header; + SegmentedSectionPtr m_sectionPtr[3]; + union { + Uint32 theData[T]; + Uint64 dummyAlign; + }; +}; + /** * class used for passing argumentes to blocks */ @@ -78,10 +88,16 @@ public: #define VMS_DATA_SIZE \ (MAX_ATTRIBUTES_IN_TABLE + MAX_TUPLE_SIZE_IN_WORDS + MAX_KEY_SIZE_IN_WORDS) +#if VMS_DATA_SIZE > 8192 +#error "VMSignal buffer is too small" +#endif + SignalHeader header; // 28 bytes SegmentedSectionPtr m_sectionPtr[3]; - Uint32 theData[25+VMS_DATA_SIZE]; // 2048 32-bit words -> 8K Bytes - + union { + Uint32 theData[8192]; // 8192 32-bit words -> 32K Bytes + Uint64 dummyAlign; + }; void garbage_register(); }; diff --git a/ndb/src/kernel/vm/WatchDog.cpp b/ndb/src/kernel/vm/WatchDog.cpp index 23475a478d3..c80317e1725 100644 --- a/ndb/src/kernel/vm/WatchDog.cpp +++ b/ndb/src/kernel/vm/WatchDog.cpp @@ -95,39 +95,40 @@ WatchDog::run(){ globalData.incrementWatchDogCounter(0); alerts = 0; } else { + const char *last_stuck_action; alerts++; - ndbout << "Ndb kernel is stuck in: "; switch (oldIPValue) { case 1: - ndbout << "Job Handling" << endl; + last_stuck_action = "Job Handling"; break; case 2: - ndbout << "Scanning Timers" << endl; + last_stuck_action = "Scanning Timers"; break; case 3: - ndbout << "External I/O" << endl; + last_stuck_action = "External I/O"; break; case 4: - ndbout << "Print Job Buffers at crash" << endl; + last_stuck_action = "Print Job Buffers at crash"; break; case 5: - ndbout << "Checking connections" << endl; + last_stuck_action = "Checking connections"; break; case 6: - ndbout << "Performing Send" << endl; + last_stuck_action = "Performing Send"; break; case 7: - ndbout << "Polling for Receive" << endl; + last_stuck_action = "Polling for Receive"; break; case 8: - ndbout << "Performing Receive" << endl; + last_stuck_action = "Performing Receive"; break; default: - ndbout << "Unknown place" << endl; + last_stuck_action = "Unknown place"; break; }//switch + ndbout << "Ndb kernel is stuck in: " << last_stuck_action << endl; if(alerts == 3){ - shutdownSystem(); + shutdownSystem(last_stuck_action); } } } @@ -135,11 +136,10 @@ WatchDog::run(){ } void -WatchDog::shutdownSystem(){ +WatchDog::shutdownSystem(const char *last_stuck_action){ - ErrorReporter::handleError(ecError, - ERR_PROGRAMERROR, - "WatchDog terminate", + ErrorReporter::handleError(NDBD_EXIT_WATCHDOG_TERMINATE, + last_stuck_action, __FILE__, NST_Watchdog); } diff --git a/ndb/src/kernel/vm/WatchDog.hpp b/ndb/src/kernel/vm/WatchDog.hpp index 4b44b1a96a2..65b23dafdb1 100644 --- a/ndb/src/kernel/vm/WatchDog.hpp +++ b/ndb/src/kernel/vm/WatchDog.hpp @@ -50,7 +50,7 @@ private: bool theStop; void run(); - void shutdownSystem(); + void shutdownSystem(const char *last_stuck_action); }; #endif // WatchDog_H diff --git a/ndb/src/kernel/vm/ndbd_malloc.cpp b/ndb/src/kernel/vm/ndbd_malloc.cpp new file mode 100644 index 00000000000..4bfccf828fc --- /dev/null +++ b/ndb/src/kernel/vm/ndbd_malloc.cpp @@ -0,0 +1,63 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <ndb_global.h> +#include "ndbd_malloc.hpp" +#include <NdbMem.h> + +//#define TRACE_MALLOC +#ifdef TRACE_MALLOC +#include <stdio.h> +#endif + +static void xxx(size_t size, size_t *s_m, size_t *s_k, size_t *s_b) +{ + *s_m = size/1024/1024; + *s_k = (size - *s_m*1024*1024)/1024; + *s_b = size - *s_m*1024*1024-*s_k*1024; +} + +static Uint64 g_allocated_memory; +void *ndbd_malloc(size_t size) +{ + void *p = NdbMem_Allocate(size); + if (p) + { + g_allocated_memory += size; +#ifdef TRACE_MALLOC + { + size_t s_m, s_k, s_b; + xxx(size, &s_m, &s_k, &s_b); + fprintf(stderr, "%p malloc(%um %uk %ub)", p, s_m, s_k, s_b); + xxx(g_allocated_memory, &s_m, &s_k, &s_b); + fprintf(stderr, "\t\ttotal(%um %uk %ub)\n", s_m, s_k, s_b); + } +#endif + } + return p; +} + +void ndbd_free(void *p, size_t size) +{ + NdbMem_Free(p); + if (p) + { + g_allocated_memory -= size; +#ifdef TRACE_MALLOC + fprintf(stderr, "%p free(%d)\n", p, size); +#endif + } +} diff --git a/ndb/src/kernel/error/ErrorMessages.hpp b/ndb/src/kernel/vm/ndbd_malloc.hpp index 38c8eec636b..136e9f0c372 100644 --- a/ndb/src/kernel/error/ErrorMessages.hpp +++ b/ndb/src/kernel/vm/ndbd_malloc.hpp @@ -14,9 +14,13 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef ERROR_MESSAGES_H -#define ERROR_MESSAGES_H +#ifndef NDBD_MALLOC_H +#define NDBD_MALLOC_H -const char* lookupErrorMessage(int faultId); +/** + * common memory allocation function for ndbd kernel + */ +void *ndbd_malloc(size_t size); +void ndbd_free(void *p, size_t size); -#endif +#endif diff --git a/ndb/src/kernel/vm/pc.hpp b/ndb/src/kernel/vm/pc.hpp index 2d745d26b1c..95839c48e4e 100644 --- a/ndb/src/kernel/vm/pc.hpp +++ b/ndb/src/kernel/vm/pc.hpp @@ -90,7 +90,7 @@ * @param limit max no of records in rec * @param rec pointer to first record in an array of records */ -#define ptrCheckGuard(ptr, limit, rec) {\ +#define ptrCheckGuardErr(ptr, limit, rec, error) {\ UintR TxxzLimit; \ TxxzLimit = (limit); \ UintR TxxxPtr; \ @@ -99,33 +99,39 @@ if (TxxxPtr < (TxxzLimit)) { \ ; \ } else { \ - progError(__LINE__, ERR_POINTER_NOTINRANGE, __FILE__); \ + progError(__LINE__, error, __FILE__); \ }} - #define ptrAss(ptr, rec) ptr.p = &rec[ptr.i] #define ptrNull(ptr) ptr.p = NULL -#define ptrGuard(ptr) if (ptr.p == NULL) \ - progError(__LINE__, ERR_POINTER_NOTINRANGE, __FILE__) -#define arrGuard(ind, size) if ((ind) >= (size)) \ - progError(__LINE__, ERR_INDEX_NOTINRANGE, __FILE__) +#define ptrGuardErr(ptr, error) if (ptr.p == NULL) \ + progError(__LINE__, error, __FILE__) +#define arrGuardErr(ind, size, error) if ((ind) >= (size)) \ + progError(__LINE__, error, __FILE__) #else #define ptrCheck(ptr, limit, rec) ptr.p = &rec[ptr.i] -#define ptrCheckGuard(ptr, limit, rec) ptr.p = &rec[ptr.i] +#define ptrCheckGuardErr(ptr, limit, rec, error) ptr.p = &rec[ptr.i] #define ptrAss(ptr, rec) ptr.p = &rec[ptr.i] #define ptrNull(ptr) ptr.p = NULL -#define ptrGuard(ptr) -#define arrGuard(ind, size) +#define ptrGuardErr(ptr, error) +#define arrGuardErr(ind, size, error) #endif +#define ptrCheckGuard(ptr, limit, rec) \ + ptrCheckGuardErr(ptr, limit, rec, NDBD_EXIT_POINTER_NOTINRANGE) +#define ptrGuard(ptr) ptrGuardErr(ptr, NDBD_EXIT_POINTER_NOTINRANGE) +#define arrGuard(ind, size) arrGuardErr(ind, size, NDBD_EXIT_INDEX_NOTINRANGE) + // -------- ERROR INSERT MACROS ------- #ifdef ERROR_INSERT #define ERROR_INSERT_VARIABLE UintR cerrorInsert #define ERROR_INSERTED(x) (cerrorInsert == (x)) +#define ERROR_INSERTED_CLEAR(x) (cerrorInsert == (x) ? (cerrorInsert = 0, true) : false) #define SET_ERROR_INSERT_VALUE(x) cerrorInsert = x #define CLEAR_ERROR_INSERT_VALUE cerrorInsert = 0 #else #define ERROR_INSERT_VARIABLE typedef void * cerrorInsert // Will generate compiler error if used #define ERROR_INSERTED(x) false +#define ERROR_INSERTED_CLEAR(x) false #define SET_ERROR_INSERT_VALUE(x) #define CLEAR_ERROR_INSERT_VALUE #endif @@ -197,34 +203,31 @@ #define ndbassert(check) \ if((check)){ \ } else { \ - progError(__LINE__, ERR_NDBREQUIRE, __FILE__); \ - } - -#define ndbrequire(check) \ - if((check)){ \ - } else { \ - progError(__LINE__, ERR_NDBREQUIRE, __FILE__); \ - } + progError(__LINE__, NDBD_EXIT_NDBASSERT, __FILE__); \ + } #else #define ndbassert(check) +#endif -#define ndbrequire(check) \ +#define ndbrequireErr(check, error) \ if((check)){ \ } else { \ - progError(__LINE__, ERR_NDBREQUIRE, __FILE__); \ - } -#endif + progError(__LINE__, error, __FILE__); \ + } + +#define ndbrequire(check) \ + ndbrequireErr(check, NDBD_EXIT_NDBREQUIRE) #define CRASH_INSERTION(errorType) \ if (!ERROR_INSERTED((errorType))) { \ } else { \ - progError(__LINE__, ERR_ERROR_INSERT, __FILE__); \ + progError(__LINE__, NDBD_EXIT_ERROR_INSERT, __FILE__); \ } #define CRASH_INSERTION2(errorNum, condition) \ if (!(ERROR_INSERTED(errorNum) && condition)) { \ } else { \ - progError(__LINE__, ERR_ERROR_INSERT, __FILE__); \ + progError(__LINE__, NDBD_EXIT_ERROR_INSERT, __FILE__); \ } #define MEMCOPY_PAGE(to, from, page_size_in_bytes) \ diff --git a/ndb/src/kernel/vm/testSuperPool.cpp b/ndb/src/kernel/vm/testSuperPool.cpp new file mode 100644 index 00000000000..194b3a43fa0 --- /dev/null +++ b/ndb/src/kernel/vm/testSuperPool.cpp @@ -0,0 +1,220 @@ +#if 0 +make -f Makefile -f - testSuperPool <<'_eof_' +testSuperPool: testSuperPool.cpp libkernel.a + $(CXXCOMPILE) -o $@ $@.cpp libkernel.a -L../../common/util/.libs -lgeneral +_eof_ +exit $? +#endif + +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "SuperPool.hpp" +#include <NdbOut.hpp> + +template <Uint32 sz> +struct A { + Uint32 a[sz]; + void fill() { + Uint32 c = 0; + for (Uint32 i = 0; i + 1 < sz; i++) { + a[i] = random(); + c = (c << 1) ^ a[i]; + } + a[sz - 1] = c; + } + void check() { + Uint32 c = 0; + for (Uint32 i = 0; i + 1 < sz; i++) { + c = (c << 1) ^ a[i]; + } + assert(a[sz - 1] == c); + } +}; + +static Uint32 +urandom(Uint32 n) +{ + return (Uint32)random() % n; +} + +static Uint32 +random_coprime(Uint32 n) +{ + Uint32 prime[] = { 101, 211, 307, 401, 503, 601, 701, 809, 907 }; + Uint32 count = sizeof(prime) / sizeof(prime[0]); + while (1) { + Uint32 i = urandom(count); + if (n % prime[i] != 0) + return prime[i]; + } +} + +static int +cmpPtrI(const void* a, const void* b) +{ + Ptr<const void> u = *(Ptr<const void>*)a; + Ptr<const void> v = *(Ptr<const void>*)b; + return u.i < v.i ? -1 : u.i > v.i ? +1 : 0; +} + +static int +cmpPtrP(const void* a, const void* b) +{ + Ptr<const void> u = *(Ptr<const void>*)a; + Ptr<const void> v = *(Ptr<const void>*)b; + return u.p < v.p ? -1 : u.p > v.p ? +1 : 0; +} + +static Uint32 loopcount = 3; + +template <Uint32 sz> +void +sp_test(SuperPool& sp) +{ + typedef A<sz> T; + RecordPool<T> rp(sp); + SuperPool::RecInfo& ri = rp.m_recInfo; + Uint32 pageCount = sp.m_totalSize / sp.m_pageSize; + Uint32 perPage = rp.m_recInfo.m_maxUseCount; + Uint32 perPool = perPage * pageCount; + ndbout << "pages=" << pageCount << " perpage=" << perPage << " perpool=" << perPool << endl; + Ptr<T>* ptrList = new Ptr<T> [perPool]; + memset(ptrList, 0x1f, perPool * sizeof(Ptr<T>)); + Uint32 loop; + for (loop = 0; loop < loopcount; loop++) { + ndbout << "loop " << loop << endl; + Uint32 i, j; + // seize all + ndbout << "seize all" << endl; + for (i = 0; i < perPool + 1; i++) { + j = i; + sp.verify(ri); + Ptr<T> ptr1 = { 0, RNIL }; + if (! rp.seize(ptr1)) + break; + // write value + ptr1.p->fill(); + ptr1.p->check(); + // verify getPtr + Ptr<T> ptr2 = { 0, ptr1.i }; + rp.getPtr(ptr2); + assert(ptr1.i == ptr2.i && ptr1.p == ptr2.p); + // save + ptrList[j] = ptr1; + } + assert(i == perPool); + assert(ri.m_totalUseCount == perPool && ri.m_totalRecCount == perPool); + sp.verify(ri); + // check duplicates + { + Ptr<T>* ptrList2 = new Ptr<T> [perPool]; + memcpy(ptrList2, ptrList, perPool * sizeof(Ptr<T>)); + qsort(ptrList2, perPool, sizeof(Ptr<T>), cmpPtrI); + for (i = 1; i < perPool; i++) + assert(ptrList2[i - 1].i != ptrList2[i].i); + qsort(ptrList2, perPool, sizeof(Ptr<T>), cmpPtrP); + for (i = 1; i < perPool; i++) + assert(ptrList2[i - 1].p != ptrList2[i].p); + delete [] ptrList2; + } + // release all in various orders + ndbout << "release all" << endl; + Uint32 coprime = random_coprime(perPool); + for (i = 0; i < perPool; i++) { + sp.verify(ri); + switch (loop % 3) { + case 0: // ascending + j = i; + break; + case 1: // descending + j = perPool - 1 - i; + break; + case 2: // pseudo-random + j = (coprime * i) % perPool; + break; + } + Ptr<T>& ptr = ptrList[j]; + assert(ptr.i != RNIL && ptr.p != 0); + ptr.p->check(); + rp.release(ptr); + assert(ptr.i == RNIL && ptr.p == 0); + } + sp.setCurrPage(ri, RNIL); + assert(ri.m_totalUseCount == 0 && ri.m_totalRecCount == 0); + sp.verify(ri); + // seize/release at random + ndbout << "seize/release at random" << endl; + for (i = 0; i < loopcount * perPool; i++) { + j = urandom(perPool); + Ptr<T>& ptr = ptrList[j]; + if (ptr.i == RNIL) { + rp.seize(ptr); + ptr.p->fill(); + } else { + ptr.p->check(); + rp.release(ptr); + } + } + ndbout << "used " << ri.m_totalUseCount << endl; + sp.verify(ri); + // release all + ndbout << "release all" << endl; + for (i = 0; i < perPool; i++) { + j = i; + Ptr<T>& ptr = ptrList[j]; + if (ptr.i != RNIL) { + ptr.p->check(); + rp.release(ptr); + } + } + sp.setCurrPage(ri, RNIL); + assert(ri.m_totalUseCount == 0 && ri.m_totalRecCount == 0); + sp.verify(ri); + } + // done + delete [] ptrList; +} + +static Uint32 pageCount = 99; +static Uint32 pageSize = 32768; +static Uint32 pageBits = 15; + +const Uint32 sz1 = 3, sz2 = 4, sz3 = 53, sz4 = 424, sz5 = 5353; + +template void sp_test<sz1>(SuperPool& sp); +template void sp_test<sz2>(SuperPool& sp); +template void sp_test<sz3>(SuperPool& sp); +template void sp_test<sz4>(SuperPool& sp); +template void sp_test<sz5>(SuperPool& sp); + +int +main() +{ + HeapPool sp(pageSize, pageBits); + sp.setSizes(pageCount * pageSize); + if (! sp.init()) + assert(false); + Uint16 s = (Uint16)getpid(); + srandom(s); + ndbout << "rand " << s << endl; + sp_test<sz1>(sp); + sp_test<sz2>(sp); + sp_test<sz3>(sp); + sp_test<sz4>(sp); + sp_test<sz5>(sp); + return 0; +} |