diff options
Diffstat (limited to 'ndb/src')
27 files changed, 1012 insertions, 1173 deletions
diff --git a/ndb/src/common/debugger/signaldata/Makefile.am b/ndb/src/common/debugger/signaldata/Makefile.am index 0d6ed45dcef..0a5806e1e00 100644 --- a/ndb/src/common/debugger/signaldata/Makefile.am +++ b/ndb/src/common/debugger/signaldata/Makefile.am @@ -22,7 +22,7 @@ libsignaldataprint_la_SOURCES = \ CopyGCI.cpp SystemError.cpp StartRec.cpp NFCompleteRep.cpp \ FailRep.cpp DisconnectRep.cpp SignalDroppedRep.cpp \ SumaImpl.cpp NdbSttor.cpp CreateFragmentation.cpp \ - UtilLock.cpp TuxMaint.cpp TupAccess.cpp AccLock.cpp \ + UtilLock.cpp TuxMaint.cpp AccLock.cpp \ LqhTrans.cpp ReadNodesConf.cpp CntrStart.cpp include $(top_srcdir)/ndb/config/common.mk.am @@ -30,3 +30,4 @@ include $(top_srcdir)/ndb/config/type_ndbapi.mk.am # Don't update the files from bitkeeper %::SCCS/s.% + diff --git a/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp b/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp index 6227a9994d1..65351663789 100644 --- a/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp +++ b/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp @@ -73,7 +73,6 @@ #include <signaldata/CntrStart.hpp> #include <signaldata/ReadNodesConf.hpp> #include <signaldata/TuxMaint.hpp> -#include <signaldata/TupAccess.hpp> #include <signaldata/AccLock.hpp> bool printCONTINUEB(FILE *, const Uint32 *, Uint32, Uint16); @@ -249,9 +248,6 @@ SignalDataPrintFunctions[] = { ,{ GSN_READ_NODESCONF, printREAD_NODES_CONF } ,{ GSN_TUX_MAINT_REQ, printTUX_MAINT_REQ } - ,{ GSN_TUP_READ_ATTRS, printTUP_READ_ATTRS } - ,{ GSN_TUP_QUERY_TH, printTUP_QUERY_TH } - ,{ GSN_TUP_STORE_TH, printTUP_STORE_TH } ,{ GSN_ACC_LOCKREQ, printACC_LOCKREQ } ,{ GSN_LQH_TRANSCONF, printLQH_TRANSCONF } }; diff --git a/ndb/src/common/debugger/signaldata/SignalNames.cpp b/ndb/src/common/debugger/signaldata/SignalNames.cpp index bb492fa0411..9d4d5bdf6f5 100644 --- a/ndb/src/common/debugger/signaldata/SignalNames.cpp +++ b/ndb/src/common/debugger/signaldata/SignalNames.cpp @@ -640,9 +640,6 @@ const GsnName SignalNames [] = { ,{ GSN_TUX_MAINT_REQ, "TUX_MAINT_REQ" } ,{ GSN_TUX_MAINT_CONF, "TUX_MAINT_CONF" } ,{ GSN_TUX_MAINT_REF, "TUX_MAINT_REF" } - ,{ GSN_TUP_READ_ATTRS, "TUP_READ_ATTRS" } - ,{ GSN_TUP_QUERY_TH, "TUP_QUERY_TH" } - ,{ GSN_TUP_STORE_TH, "TUP_STORE_TH" } ,{ GSN_TUX_BOUND_INFO, "TUX_BOUND_INFO" } ,{ GSN_ACC_LOCKREQ, "ACC_LOCKREQ" } diff --git a/ndb/src/common/debugger/signaldata/TupAccess.cpp b/ndb/src/common/debugger/signaldata/TupAccess.cpp deleted file mode 100644 index e94d4636cf5..00000000000 --- a/ndb/src/common/debugger/signaldata/TupAccess.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/* Copyright (C) 2003 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -#include <signaldata/TupAccess.hpp> -#include <SignalLoggerManager.hpp> -#include <AttributeHeader.hpp> - -bool -printTUP_READ_ATTRS(FILE* output, const Uint32* theData, Uint32 len, Uint16 rbn) -{ - const TupReadAttrs* const sig = (const TupReadAttrs*)theData; - if (sig->errorCode == RNIL) - fprintf(output, " errorCode=RNIL flags=%x\n", sig->requestInfo); - else - fprintf(output, " errorCode=%u flags=%x\n", sig->errorCode, sig->requestInfo); - fprintf(output, " table: id=%u", sig->tableId); - fprintf(output, " fragment: id=%u ptr=0x%x\n", sig->fragId, sig->fragPtrI); - fprintf(output, " tuple: addr=0x%x version=%u", sig->tupAddr, sig->tupVersion); - fprintf(output, " realPage=0x%x offset=%u\n", sig->pageId, sig->pageOffset); - const Uint32* buffer = (const Uint32*)sig + TupReadAttrs::SignalLength; - Uint32 attrCount = buffer[0]; - bool readKeys = (sig->requestInfo & TupReadAttrs::ReadKeys); - if (sig->errorCode == RNIL && ! readKeys || - sig->errorCode == 0 && readKeys) { - fprintf(output, " input: attrCount=%u\n", attrCount); - for (unsigned i = 0; i < attrCount; i++) { - AttributeHeader ah(buffer[1 + i]); - fprintf(output, " %u: attrId=%u\n", i, ah.getAttributeId()); - } - } - if (sig->errorCode == 0) { - fprintf(output, " output: attrCount=%u\n", attrCount); - Uint32 pos = 1 + attrCount; - for (unsigned i = 0; i < attrCount; i++) { - AttributeHeader ah(buffer[pos++]); - fprintf(output, " %u: attrId=%u dataSize=%u\n", i, ah.getAttributeId(), ah.getDataSize()); - Uint32 next = pos + ah.getDataSize(); - Uint32 printpos = 0; - while (pos < next) { - SignalLoggerManager::printDataWord(output, printpos, buffer[pos]); - pos++; - } - if (ah.getDataSize() > 0) - fprintf(output, "\n"); - } - } - return true; -} - -bool -printTUP_QUERY_TH(FILE* output, const Uint32* theData, Uint32 len, Uint16 rbn) -{ - const TupQueryTh* const sig = (const TupQueryTh*)theData; - fprintf(output, "tableId = %u, fragId = %u ", sig->tableId, sig->fragId); - fprintf(output, "tuple: addr = 0x%x version = %u\n", sig->tupAddr, - sig->tupVersion); - fprintf(output, "transId1 = 0x%x, transId2 = 0x%x, savePointId = %u\n", - sig->transId1, sig->transId2, sig->savePointId); - return true; -} - -bool -printTUP_STORE_TH(FILE* output, const Uint32* theData, Uint32 len, Uint16 rbn) -{ - const TupStoreTh* const sig = (const TupStoreTh*)theData; - if (sig->errorCode == RNIL) - fprintf(output, " errorCode=RNIL\n"); - else - fprintf(output, " errorCode=%u\n", sig->errorCode); - fprintf(output, " table: id=%u", sig->tableId); - fprintf(output, " fragment: id=%u ptr=0x%x\n", sig->fragId, sig->fragPtrI); - fprintf(output, " tuple: addr=0x%x", sig->tupAddr); - if ((sig->tupAddr & 0x1) == 0) { - fprintf(output, " fragPage=0x%x index=%u", - sig->tupAddr >> MAX_TUPLES_BITS, - (sig->tupAddr & ((1 <<MAX_TUPLES_BITS) - 1)) >> 1); - fprintf(output, " realPage=0x%x offset=%u\n", sig->pageId, sig->pageOffset); - } else { - fprintf(output, " cacheId=%u\n", - sig->tupAddr >> 1); - } - if (sig->tupVersion != 0) { - fprintf(output, " version=%u ***invalid***\n", sig->tupVersion); - } - bool showdata = true; - switch (sig->opCode) { - case TupStoreTh::OpRead: - fprintf(output, " operation=Read\n"); - showdata = false; - break; - case TupStoreTh::OpInsert: - fprintf(output, " operation=Insert\n"); - break; - case TupStoreTh::OpUpdate: - fprintf(output, " operation=Update\n"); - break; - case TupStoreTh::OpDelete: - fprintf(output, " operation=Delete\n"); - showdata = false; - break; - default: - fprintf(output, " operation=%u ***invalid***\n", sig->opCode); - break; - } - fprintf(output, " data: offset=%u size=%u", sig->dataOffset, sig->dataSize); - if (! showdata) { - fprintf(output, " [not printed]\n"); - } else { - fprintf(output, "\n"); - const Uint32* buffer = (const Uint32*)sig + TupStoreTh::SignalLength; - Uint32 pos = 0; - while (pos < sig->dataSize) - SignalLoggerManager::printDataWord(output, pos, buffer[sig->dataOffset + pos]); - if (sig->dataSize > 0) - fprintf(output, "\n"); - } - return true; -}; diff --git a/ndb/src/cw/cpcd/Makefile.am b/ndb/src/cw/cpcd/Makefile.am index 6345bae9bbe..1f7b0d88448 100644 --- a/ndb/src/cw/cpcd/Makefile.am +++ b/ndb/src/cw/cpcd/Makefile.am @@ -1,5 +1,5 @@ -ndbtools_PROGRAMS = ndb_cpcd +ndbbin_PROGRAMS = ndb_cpcd ndb_cpcd_SOURCES = main.cpp CPCD.cpp Process.cpp APIService.cpp Monitor.cpp common.cpp diff --git a/ndb/src/cw/cpcd/Process.cpp b/ndb/src/cw/cpcd/Process.cpp index 28548818b31..a67dba95dc7 100644 --- a/ndb/src/cw/cpcd/Process.cpp +++ b/ndb/src/cw/cpcd/Process.cpp @@ -209,49 +209,45 @@ int set_ulimit(const BaseString & pair){ #ifdef HAVE_GETRLIMIT errno = 0; - do { - Vector<BaseString> list; - pair.split(list, ":"); - if(list.size() != 2){ - break; - } - - int res; - rlim_t value = RLIM_INFINITY; - if(!(list[1].trim() == "unlimited")){ - value = atoi(list[1].c_str()); - } - - struct rlimit rlp; + Vector<BaseString> list; + pair.split(list, ":"); + if(list.size() != 2){ + logger.error("Unable to process ulimit: split >%s< list.size()=%d", + pair.c_str(), list.size()); + return -1; + } + + int res; + rlim_t value = RLIM_INFINITY; + if(!(list[1].trim() == "unlimited")){ + value = atoi(list[1].c_str()); + } + + struct rlimit rlp; #define _RLIMIT_FIX(x) { res = getrlimit(x,&rlp); if(!res){ rlp.rlim_cur = value; res = setrlimit(x, &rlp); }} - - if(list[0].trim() == "c"){ - _RLIMIT_FIX(RLIMIT_CORE); - } else if(list[0] == "d"){ - _RLIMIT_FIX(RLIMIT_DATA); - } else if(list[0] == "f"){ - _RLIMIT_FIX(RLIMIT_FSIZE); - } else if(list[0] == "n"){ - _RLIMIT_FIX(RLIMIT_NOFILE); - } else if(list[0] == "s"){ - _RLIMIT_FIX(RLIMIT_STACK); - } else if(list[0] == "t"){ - _RLIMIT_FIX(RLIMIT_CPU); - } else { - errno = EINVAL; - break; - } - if(!res) - break; - - return 0; - } while(false); - logger.error("Unable to process ulimit: %s(%s)", - pair.c_str(), strerror(errno)); - return -1; -#else - return 0; // Maybe it's ok anyway... + + if(list[0].trim() == "c"){ + _RLIMIT_FIX(RLIMIT_CORE); + } else if(list[0] == "d"){ + _RLIMIT_FIX(RLIMIT_DATA); + } else if(list[0] == "f"){ + _RLIMIT_FIX(RLIMIT_FSIZE); + } else if(list[0] == "n"){ + _RLIMIT_FIX(RLIMIT_NOFILE); + } else if(list[0] == "s"){ + _RLIMIT_FIX(RLIMIT_STACK); + } else if(list[0] == "t"){ + _RLIMIT_FIX(RLIMIT_CPU); + } else { + errno = EINVAL; + } + if(res){ + logger.error("Unable to process ulimit: %s res=%d error=%d(%s)", + pair.c_str(), res, errno, strerror(errno)); + return -1; + } #endif + return 0; } void diff --git a/ndb/src/kernel/blocks/dblqh/Dblqh.hpp b/ndb/src/kernel/blocks/dblqh/Dblqh.hpp index 824f74c59af..e0994955818 100644 --- a/ndb/src/kernel/blocks/dblqh/Dblqh.hpp +++ b/ndb/src/kernel/blocks/dblqh/Dblqh.hpp @@ -410,7 +410,6 @@ */ class Dblqh: public SimulatedBlock { public: - enum LcpCloseState { LCP_IDLE = 0, LCP_RUNNING = 1, // LCP is running @@ -1990,7 +1989,6 @@ public: UintR nextTcLogQueue; UintR nextTc; UintR nextTcConnectrec; - Uint16 nodeAfterNext[2]; UintR prevHashRec; UintR prevLogTcrec; UintR prevTc; @@ -2027,6 +2025,7 @@ public: Uint16 nextReplica; Uint16 primKeyLen; Uint16 save1; + Uint16 nodeAfterNext[3]; Uint8 activeCreat; Uint8 apiVersionNo; diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index 71e5aa5c0bc..eb8e2917a8e 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -3574,7 +3574,6 @@ void Dblqh::prepareContinueAfterBlockedLab(Signal* signal) key.scanNumber = KeyInfo20::getScanNo(regTcPtr->tcScanInfo); key.fragPtrI = fragptr.i; c_scanTakeOverHash.find(scanptr, key); - ndbassert(scanptr.i != RNIL); } if (scanptr.i == RNIL) { jam(); @@ -5995,10 +5994,15 @@ void Dblqh::abortStateHandlerLab(Signal* signal) break; case TcConnectionrec::STOPPED: jam(); -/* ------------------------------------------------------------------------- */ -/*WE ARE CURRENTLY QUEUED FOR ACCESS TO THE FRAGMENT BY A LOCAL CHECKPOINT. */ -/* ------------------------------------------------------------------------- */ + /* --------------------------------------------------------------------- + * WE ARE CURRENTLY QUEUED FOR ACCESS TO THE FRAGMENT BY A LCP + * Since nothing has been done, just release operation + * i.e. no prepare log record has been written + * so no abort log records needs to be written + */ releaseWaitQueue(signal); + continueAfterLogAbortWriteLab(signal); + return; break; case TcConnectionrec::WAIT_AI_AFTER_ABORT: jam(); @@ -9954,9 +9958,11 @@ void Dblqh::execLCP_HOLDOPCONF(Signal* signal) return; } else { jam(); + /* NO MORE HOLDOPS NEEDED */ lcpLocptr.p->lcpLocstate = LcpLocRecord::HOLDOP_READY; checkLcpHoldop(signal); + if (lcpPtr.p->lcpState == LcpRecord::LCP_WAIT_ACTIVE_FINISH) { if (fragptr.p->activeList == RNIL) { jam(); @@ -9974,6 +9980,7 @@ void Dblqh::execLCP_HOLDOPCONF(Signal* signal) }//if }//if }//if + /* ----------------------- */ /* ELSE */ /* ------------------------------------------------------------------------ @@ -10046,7 +10053,6 @@ void Dblqh::execTUP_LCPSTARTED(Signal* signal) void Dblqh::lcpStartedLab(Signal* signal) { checkLcpStarted(signal); - if (lcpPtr.p->lcpState == LcpRecord::LCP_STARTED) { jam(); /* ---------------------------------------------------------------------- @@ -10065,7 +10071,7 @@ void Dblqh::lcpStartedLab(Signal* signal) sendAccContOp(signal); /* START OPERATIONS IN ACC */ moveAccActiveFrag(signal); /* MOVE FROM ACC BLOCKED LIST TO ACTIVE LIST ON FRAGMENT */ - }//if + } /*---------------*/ /* ELSE */ /*-------------------------------------------------------------------------*/ @@ -10126,32 +10132,27 @@ void Dblqh::execLQH_RESTART_OP(Signal* signal) lcpPtr.i = signal->theData[1]; ptrCheckGuard(lcpPtr, clcpFileSize, lcpRecord); - if (fragptr.p->fragStatus == Fragrecord::BLOCKED) { - if (lcpPtr.p->lcpState == LcpRecord::LCP_STARTED) { - jam(); - /***********************************************************************/ - /* THIS SIGNAL CAN ONLY BE RECEIVED WHEN FRAGMENT IS BLOCKED AND - * THE LOCAL CHECKPOINT HAS BEEN STARTED. THE BLOCKING WILL BE - * REMOVED AS SOON AS ALL OPERATIONS HAVE BEEN STARTED. - ***********************************************************************/ - restartOperationsLab(signal); - return; - } else { - jam(); - if (lcpPtr.p->lcpState == LcpRecord::LCP_BLOCKED_COMP) { - jam(); - /*******************************************************************> - * THE CHECKPOINT IS COMPLETED BUT HAS NOT YET STARTED UP - * ALL OPERATIONS AGAIN. - * WE PERFORM THIS START-UP BEFORE CONTINUING WITH THE NEXT - * FRAGMENT OF THE LOCAL CHECKPOINT TO AVOID ANY STRANGE ERRORS. - *******************************************************************> */ - restartOperationsLab(signal); - return; - }//if - }//if - }//if - ndbrequire(false); + ndbrequire(fragptr.p->fragStatus == Fragrecord::BLOCKED); + if (lcpPtr.p->lcpState == LcpRecord::LCP_STARTED) { + jam(); + /***********************************************************************/ + /* THIS SIGNAL CAN ONLY BE RECEIVED WHEN FRAGMENT IS BLOCKED AND + * THE LOCAL CHECKPOINT HAS BEEN STARTED. THE BLOCKING WILL BE + * REMOVED AS SOON AS ALL OPERATIONS HAVE BEEN STARTED. + ***********************************************************************/ + restartOperationsLab(signal); + } else if (lcpPtr.p->lcpState == LcpRecord::LCP_BLOCKED_COMP) { + jam(); + /*******************************************************************> + * THE CHECKPOINT IS COMPLETED BUT HAS NOT YET STARTED UP + * ALL OPERATIONS AGAIN. + * WE PERFORM THIS START-UP BEFORE CONTINUING WITH THE NEXT + * FRAGMENT OF THE LOCAL CHECKPOINT TO AVOID ANY STRANGE ERRORS. + *******************************************************************> */ + restartOperationsLab(signal); + } else { + ndbrequire(false); + } }//Dblqh::execLQH_RESTART_OP() void Dblqh::restartOperationsLab(Signal* signal) @@ -10204,13 +10205,13 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal) * WHEN ARRIVING HERE THE OPERATION IS ALREADY SET IN THE ACTIVE LIST. * THUS WE CAN IMMEDIATELY CALL THE METHODS THAT EXECUTE FROM WHERE * THE OPERATION WAS STOPPED. - *------------------------------------------------------------------------- */ + *------------------------------------------------------------------------ */ switch (tcConnectptr.p->transactionState) { case TcConnectionrec::STOPPED: jam(); /*----------------------------------------------------------------------- * STOPPED BEFORE TRYING TO SEND ACCKEYREQ - *----------------------------------------------------------------------- */ + *---------------------------------------------------------------------- */ prepareContinueAfterBlockedLab(signal); return; break; @@ -10218,7 +10219,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal) jam(); /* ---------------------------------------------------------------------- * STOPPED BEFORE TRYING TO SEND ACC_COMMITREQ - * ---------------------------------------------------------------------- */ + * --------------------------------------------------------------------- */ releaseActiveFrag(signal); commitContinueAfterBlockedLab(signal); return; @@ -10227,7 +10228,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal) jam(); /* ---------------------------------------------------------------------- * STOPPED BEFORE TRYING TO SEND ACC_ABORTREQ - * ---------------------------------------------------------------------- */ + * --------------------------------------------------------------------- */ abortContinueAfterBlockedLab(signal, true); return; break; @@ -10235,7 +10236,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal) jam(); /* ---------------------------------------------------------------------- * STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING COPY FRAGMENT - * ---------------------------------------------------------------------- */ + * --------------------------------------------------------------------- */ continueCopyAfterBlockedLab(signal); return; break; @@ -10243,7 +10244,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal) jam(); /* ---------------------------------------------------------------------- * STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING COPY FRAGMENT - * ---------------------------------------------------------------------- */ + * --------------------------------------------------------------------- */ continueFirstCopyAfterBlockedLab(signal); return; break; @@ -10251,7 +10252,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal) jam(); /* ---------------------------------------------------------------------- * STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING SCAN - * ---------------------------------------------------------------------- */ + * --------------------------------------------------------------------- */ tcConnectptr.p->transactionState = TcConnectionrec::SCAN_STATE_USED; continueFirstScanAfterBlockedLab(signal); return; @@ -10260,7 +10261,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal) jam(); /* ---------------------------------------------------------------------- * STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING SCAN - * ---------------------------------------------------------------------- */ + * --------------------------------------------------------------------- */ tcConnectptr.p->transactionState = TcConnectionrec::SCAN_STATE_USED; continueAfterCheckLcpStopBlocked(signal); return; @@ -10269,7 +10270,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal) jam(); /* ---------------------------------------------------------------------- * STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING SCAN - * ---------------------------------------------------------------------- */ + * --------------------------------------------------------------------- */ tcConnectptr.p->transactionState = TcConnectionrec::SCAN_STATE_USED; continueScanAfterBlockedLab(signal); return; @@ -10279,7 +10280,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal) /* ---------------------------------------------------------------------- * STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING RELEASE * LOCKS IN SCAN - * ---------------------------------------------------------------------- */ + * --------------------------------------------------------------------- */ tcConnectptr.p->transactionState = TcConnectionrec::SCAN_STATE_USED; continueScanReleaseAfterBlockedLab(signal); return; @@ -10288,7 +10289,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal) jam(); /* ---------------------------------------------------------------------- * STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING CLOSE OF SCAN - * ---------------------------------------------------------------------- */ + * --------------------------------------------------------------------- */ continueCloseScanAfterBlockedLab(signal); return; break; @@ -10296,7 +10297,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal) jam(); /* ---------------------------------------------------------------------- * STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING CLOSE OF COPY - * ---------------------------------------------------------------------- */ + * --------------------------------------------------------------------- */ continueCloseCopyAfterBlockedLab(signal); return; break; @@ -10422,7 +10423,12 @@ void Dblqh::contChkpNextFragLab(Signal* signal) * ----------------------------------------------------------------------- */ if (fragptr.p->fragStatus == Fragrecord::BLOCKED) { jam(); + /** + * LCP of fragment complete + * but restarting of operations isn't + */ lcpPtr.p->lcpState = LcpRecord::LCP_BLOCKED_COMP; + //restartOperationsLab(signal); return; }//if @@ -10699,25 +10705,25 @@ void Dblqh::checkLcpStarted(Signal* signal) terrorCode = ZOK; clsLcpLocptr.i = lcpPtr.p->firstLcpLocAcc; + int i = 0; do { ptrCheckGuard(clsLcpLocptr, clcpLocrecFileSize, lcpLocRecord); - if (clsLcpLocptr.p->lcpLocstate != LcpLocRecord::ACC_STARTED) { - ndbrequire((clsLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_COMPLETED) || - (clsLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_WAIT_STARTED)); + if (clsLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_WAIT_STARTED){ return; }//if clsLcpLocptr.i = clsLcpLocptr.p->nextLcpLoc; + i++; } while (clsLcpLocptr.i != RNIL); + i = 0; clsLcpLocptr.i = lcpPtr.p->firstLcpLocTup; do { ptrCheckGuard(clsLcpLocptr, clcpLocrecFileSize, lcpLocRecord); - if (clsLcpLocptr.p->lcpLocstate != LcpLocRecord::TUP_STARTED) { - ndbrequire((clsLcpLocptr.p->lcpLocstate == LcpLocRecord::TUP_COMPLETED) || - (clsLcpLocptr.p->lcpLocstate == LcpLocRecord::TUP_WAIT_STARTED)); + if (clsLcpLocptr.p->lcpLocstate == LcpLocRecord::TUP_WAIT_STARTED){ return; }//if clsLcpLocptr.i = clsLcpLocptr.p->nextLcpLoc; + i++; } while (clsLcpLocptr.i != RNIL); lcpPtr.p->lcpState = LcpRecord::LCP_STARTED; }//Dblqh::checkLcpStarted() @@ -10875,18 +10881,28 @@ void Dblqh::sendAccContOp(Signal* signal) { LcpLocRecordPtr sacLcpLocptr; + int count = 0; sacLcpLocptr.i = lcpPtr.p->firstLcpLocAcc; do { ptrCheckGuard(sacLcpLocptr, clcpLocrecFileSize, lcpLocRecord); sacLcpLocptr.p->accContCounter = 0; -/* ------------------------------------------------------------------------- */ -/*SEND START OPERATIONS TO ACC AGAIN */ -/* ------------------------------------------------------------------------- */ - signal->theData[0] = lcpPtr.p->lcpAccptr; - signal->theData[1] = sacLcpLocptr.p->locFragid; - sendSignal(fragptr.p->accBlockref, GSN_ACC_CONTOPREQ, signal, 2, JBA); + if(sacLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_STARTED){ + /* ------------------------------------------------------------------- */ + /*SEND START OPERATIONS TO ACC AGAIN */ + /* ------------------------------------------------------------------- */ + signal->theData[0] = lcpPtr.p->lcpAccptr; + signal->theData[1] = sacLcpLocptr.p->locFragid; + sendSignal(fragptr.p->accBlockref, GSN_ACC_CONTOPREQ, signal, 2, JBA); + count++; + } else if(sacLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_COMPLETED){ + signal->theData[0] = sacLcpLocptr.i; + sendSignal(reference(), GSN_ACC_CONTOPCONF, signal, 1, JBB); + } else { + ndbrequire(false); + } sacLcpLocptr.i = sacLcpLocptr.p->nextLcpLoc; } while (sacLcpLocptr.i != RNIL); + }//Dblqh::sendAccContOp() /* ------------------------------------------------------------------------- */ diff --git a/ndb/src/kernel/blocks/dbtup/Dbtup.hpp b/ndb/src/kernel/blocks/dbtup/Dbtup.hpp index 825de4f6c2c..71af563599c 100644 --- a/ndb/src/kernel/blocks/dbtup/Dbtup.hpp +++ b/ndb/src/kernel/blocks/dbtup/Dbtup.hpp @@ -1014,9 +1014,15 @@ public: void tuxReadAttrs(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32 tupVersion, Uint32 numAttrs, const Uint32* attrIds, const Uint32** attrData); /* - * TUX reads primary key for md5 summing and when returning keyinfo. + * TUX reads primary key without headers into an array of words. Used + * for md5 summing and when returning keyinfo. */ - void tuxReadKeys(); // under construction + void tuxReadKeys(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32* pkSize, Uint32* pkData); + + /* + * TUX checks if tuple is visible to scan. + */ + bool tuxQueryTh(Uint32 fragPtrI, Uint32 tupAddr, Uint32 tupVersion, Uint32 transId1, Uint32 transId2, Uint32 savePointId); private: BLOCK_DEFINES(Dbtup); @@ -1062,9 +1068,6 @@ private: void execTUP_WRITELOG_REQ(Signal* signal); // Ordered index related - void execTUP_READ_ATTRS(Signal* signal); - void execTUP_QUERY_TH(Signal* signal); - void execTUP_STORE_TH(Signal* signal); void execBUILDINDXREQ(Signal* signal); void buildIndex(Signal* signal, Uint32 buildPtrI); void buildIndexReply(Signal* signal, const BuildIndexRec* buildRec); diff --git a/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp b/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp index 3b54817edb0..8133f70a803 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp @@ -132,9 +132,6 @@ Dbtup::Dbtup(const class Configuration & conf) addRecSignal(GSN_TUP_WRITELOG_REQ, &Dbtup::execTUP_WRITELOG_REQ); // Ordered index related - addRecSignal(GSN_TUP_READ_ATTRS, &Dbtup::execTUP_READ_ATTRS); - addRecSignal(GSN_TUP_QUERY_TH, &Dbtup::execTUP_QUERY_TH); - addRecSignal(GSN_TUP_STORE_TH, &Dbtup::execTUP_STORE_TH); addRecSignal(GSN_BUILDINDXREQ, &Dbtup::execBUILDINDXREQ); initData(); diff --git a/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp b/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp index f11de5238e2..e7a431f17de 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp @@ -22,7 +22,6 @@ #include <AttributeDescriptor.hpp> #include "AttributeOffset.hpp" #include <AttributeHeader.hpp> -#include <signaldata/TupAccess.hpp> #include <signaldata/TuxMaint.hpp> #define ljam() { jamLine(28000 + __LINE__); } @@ -152,10 +151,10 @@ Dbtup::tuxReadAttrs(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32 tu const Uint32* tupleHeader = &pagePtr.p->pageWord[pageOffset]; for (Uint32 i = 0; i < numAttrs; i++) { AttributeHeader ah(attrIds[i]); - Uint32 attrId = ah.getAttributeId(); - Uint32 index = tabDescriptor + (attrId << ZAD_LOG_SIZE); - Uint32 desc1 = tableDescriptor[index].tabDescr; - Uint32 desc2 = tableDescriptor[index + 1].tabDescr; + const Uint32 attrId = ah.getAttributeId(); + const Uint32 index = tabDescriptor + (attrId << ZAD_LOG_SIZE); + const Uint32 desc1 = tableDescriptor[index].tabDescr; + const Uint32 desc2 = tableDescriptor[index + 1].tabDescr; if (AttributeDescriptor::getNullable(desc1)) { Uint32 offset = AttributeOffset::getNullFlagOffset(desc2); ndbrequire(offset < tablePtr.p->tupNullWords); @@ -171,275 +170,78 @@ Dbtup::tuxReadAttrs(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32 tu } } -void // under construction -Dbtup::tuxReadKeys() -{ -} - -// deprecated signal interfaces - void -Dbtup::execTUP_READ_ATTRS(Signal* signal) +Dbtup::tuxReadKeys(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32* pkSize, Uint32* pkData) { ljamEntry(); - TupReadAttrs* const sig = (TupReadAttrs*)signal->getDataPtrSend(); - TupReadAttrs reqCopy = *sig; - TupReadAttrs* const req = &reqCopy; - req->errorCode = 0; - // get table + FragrecordPtr fragPtr; + fragPtr.i = fragPtrI; + ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); TablerecPtr tablePtr; - tablePtr.i = req->tableId; + tablePtr.i = fragPtr.p->fragTableId; ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); - // get fragment - FragrecordPtr fragPtr; - if (req->fragPtrI == RNIL) { - ljam(); - getFragmentrec(fragPtr, req->fragId, tablePtr.p); - ndbrequire(fragPtr.i != RNIL); - req->fragPtrI = fragPtr.i; - } else { - fragPtr.i = req->fragPtrI; - ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); - ndbrequire(req->fragId == fragPtr.p->fragmentId); - } - // get page PagePtr pagePtr; - if (req->pageId == RNIL) { - ljam(); - Uint32 fragPageId = req->tupAddr >> MAX_TUPLES_BITS; - Uint32 pageIndex = req->tupAddr & ((1 << MAX_TUPLES_BITS ) - 1); - ndbrequire((pageIndex & 0x1) == 0); - // data returned for original tuple - req->pageId = getRealpid(fragPtr.p, fragPageId); - req->pageOffset = ZPAGE_HEADER_SIZE + (pageIndex >> 1) * tablePtr.p->tupheadsize; - } - pagePtr.i = req->pageId; + pagePtr.i = pageId; ptrCheckGuard(pagePtr, cnoOfPage, page); - Uint32 pageOffset = req->pageOffset; - // search for tuple version if not original - if (! (req->requestInfo & TupReadAttrs::ReadKeys) && - pagePtr.p->pageWord[pageOffset + 1] != req->tupVersion) { - ljam(); - OperationrecPtr opPtr; - opPtr.i = pagePtr.p->pageWord[pageOffset]; - Uint32 loopGuard = 0; - while (true) { - ptrCheckGuard(opPtr, cnoOfOprec, operationrec); - if (opPtr.p->realPageIdC != RNIL) { - pagePtr.i = opPtr.p->realPageIdC; - pageOffset = opPtr.p->pageOffsetC; - ptrCheckGuard(pagePtr, cnoOfPage, page); - if (pagePtr.p->pageWord[pageOffset + 1] == req->tupVersion) { - ljam(); - break; - } - } - ljam(); - // next means before in event order - opPtr.i = opPtr.p->nextActiveOp; - ndbrequire(++loopGuard < (1 << ZTUP_VERSION_BITS)); + const Uint32 tabDescriptor = tablePtr.p->tabDescriptor; + const Uint32 numAttrs = tablePtr.p->noOfKeyAttr; + const Uint32* attrIds = &tableDescriptor[tablePtr.p->readKeyArray].tabDescr; + const Uint32* tupleHeader = &pagePtr.p->pageWord[pageOffset]; + Uint32 size = 0; + for (Uint32 i = 0; i < numAttrs; i++) { + AttributeHeader ah(attrIds[i]); + const Uint32 attrId = ah.getAttributeId(); + const Uint32 index = tabDescriptor + (attrId << ZAD_LOG_SIZE); + const Uint32 desc1 = tableDescriptor[index].tabDescr; + const Uint32 desc2 = tableDescriptor[index + 1].tabDescr; + ndbrequire(! AttributeDescriptor::getNullable(desc1)); + const Uint32 attrSize = AttributeDescriptor::getSizeInWords(desc1); + const Uint32* attrData = tupleHeader + AttributeOffset::getOffset(desc2); + for (Uint32 j = 0; j < attrSize; j++) { + pkData[size + j] = attrData[j]; } + size += attrSize; } - // shared buffer - Uint32* buffer = (Uint32*)sig + TupReadAttrs::SignalLength; - // if request is for keys then we create input section - if (req->requestInfo & TupReadAttrs::ReadKeys) { - ljam(); - buffer[0] = tablePtr.p->noOfKeyAttr; - const Uint32* keyArray = &tableDescriptor[tablePtr.p->readKeyArray].tabDescr; - MEMCOPY_NO_WORDS(&buffer[1], keyArray, tablePtr.p->noOfKeyAttr); - } - Uint32 inBufLen = buffer[0]; - Uint32* inBuffer = &buffer[1]; - Uint32* outBuffer = &buffer[1 + inBufLen]; - Uint32 maxRead = ZATTR_BUFFER_SIZE; - // save globals - TablerecPtr tabptr_old = tabptr; - FragrecordPtr fragptr_old = fragptr; - OperationrecPtr operPtr_old = operPtr; - // new globals - tabptr = tablePtr; - fragptr = fragPtr; - operPtr.i = RNIL; // XXX check later - operPtr.p = NULL; - int ret = readAttributes(pagePtr.p, pageOffset, inBuffer, inBufLen, outBuffer, maxRead); - // restore globals - tabptr = tabptr_old; - fragptr = fragptr_old; - operPtr = operPtr_old; - // check error - if ((Uint32)ret == (Uint32)-1) { - ljam(); - req->errorCode = terrorCode; - } - // copy back - *sig = *req; + *pkSize = size; } -void -Dbtup::execTUP_QUERY_TH(Signal* signal) +bool +Dbtup::tuxQueryTh(Uint32 fragPtrI, Uint32 tupAddr, Uint32 tupVersion, Uint32 transId1, Uint32 transId2, Uint32 savePointId) { ljamEntry(); - Operationrec tempOp; - TupQueryTh* const req = (TupQueryTh*)signal->getDataPtrSend(); - Uint32 tableId = req->tableId; - Uint32 fragId = req->fragId; - Uint32 tupAddr = req->tupAddr; - Uint32 req_tupVersion = req->tupVersion; - Uint32 transid1 = req->transId1; - Uint32 transid2 = req->transId2; - Uint32 savePointId = req->savePointId; - Uint32 ret_result = 0; - // get table + FragrecordPtr fragPtr; + fragPtr.i = fragPtrI; + ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); TablerecPtr tablePtr; - tablePtr.i = tableId; + tablePtr.i = fragPtr.p->fragTableId; ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); - // get fragment - FragrecordPtr fragPtr; - getFragmentrec(fragPtr, fragId, tablePtr.p); - ndbrequire(fragPtr.i != RNIL); // get page PagePtr pagePtr; Uint32 fragPageId = tupAddr >> MAX_TUPLES_BITS; Uint32 pageIndex = tupAddr & ((1 << MAX_TUPLES_BITS ) - 1); - + // use temp op rec + Operationrec tempOp; tempOp.fragPageId = fragPageId; tempOp.pageIndex = pageIndex; - tempOp.transid1 = transid1; - tempOp.transid2 = transid2; + tempOp.transid1 = transId1; + tempOp.transid2 = transId2; tempOp.savePointId = savePointId; tempOp.optype = ZREAD; tempOp.dirtyOp = 1; if (getPage(pagePtr, &tempOp, fragPtr.p, tablePtr.p)) { /* - We use the normal getPage which will return the tuple to be used - for this transaction and savepoint id. If its tuple version equals - the requested then we have a visible tuple otherwise not. + * We use the normal getPage which will return the tuple to be used + * for this transaction and savepoint id. If its tuple version + * equals the requested then we have a visible tuple otherwise not. */ ljam(); Uint32 read_tupVersion = pagePtr.p->pageWord[tempOp.pageOffset + 1]; - if (read_tupVersion == req_tupVersion) { + if (read_tupVersion == tupVersion) { ljam(); - ret_result = 1; - } - } - req->returnCode = ret_result; - return; -} - -void -Dbtup::execTUP_STORE_TH(Signal* signal) -{ - ljamEntry(); - TupStoreTh* const sig = (TupStoreTh*)signal->getDataPtrSend(); - TupStoreTh reqCopy = *sig; - TupStoreTh* const req = &reqCopy; - req->errorCode = 0; - ndbrequire(req->tupVersion == 0); - // get table - TablerecPtr tablePtr; - tablePtr.i = req->tableId; - ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); - // offset to attribute 0 - Uint32 attrDescIndex = tablePtr.p->tabDescriptor + (0 << ZAD_LOG_SIZE); - Uint32 attrDataOffset = AttributeOffset::getOffset(tableDescriptor[attrDescIndex + 1].tabDescr); - // get fragment - FragrecordPtr fragPtr; - if (req->fragPtrI == RNIL) { - ljam(); - getFragmentrec(fragPtr, req->fragId, tablePtr.p); - ndbrequire(fragPtr.i != RNIL); - req->fragPtrI = fragPtr.i; - } else { - fragPtr.i = req->fragPtrI; - ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); - ndbrequire(req->fragId == fragPtr.p->fragmentId); - } - // handle each case - switch (req->opCode) { - case TupStoreTh::OpRead: - ljam(); - { - PagePtr pagePtr; - if (req->pageId == RNIL) { - ljam(); - Uint32 fragPageId = req->tupAddr >> MAX_TUPLES_BITS; - Uint32 pageIndex = req->tupAddr & ((1 << MAX_TUPLES_BITS ) - 1); - ndbrequire((pageIndex & 0x1) == 0); - req->pageId = getRealpid(fragPtr.p, fragPageId); - req->pageOffset = ZPAGE_HEADER_SIZE + (pageIndex >> 1) * tablePtr.p->tupheadsize; - } - pagePtr.i = req->pageId; - ptrCheckGuard(pagePtr, cnoOfPage, page); - Uint32* data = &pagePtr.p->pageWord[req->pageOffset] + attrDataOffset; - Uint32* buffer = (Uint32*)sig + TupStoreTh::SignalLength; - ndbrequire(req->dataOffset + req->dataSize <= tablePtr.p->tupheadsize); - memcpy(buffer + req->dataOffset, data + req->dataOffset, req->dataSize << 2); - } - break; - case TupStoreTh::OpInsert: - ljam(); - { - PagePtr pagePtr; - if (! allocTh(fragPtr.p, tablePtr.p, NORMAL_PAGE, signal, req->pageOffset, pagePtr)) { - ljam(); - req->errorCode = terrorCode; - break; - } - req->pageId = pagePtr.i; - Uint32 fragPageId = pagePtr.p->pageWord[ZPAGE_FRAG_PAGE_ID_POS]; - Uint32 pageIndex = ((req->pageOffset - ZPAGE_HEADER_SIZE) / tablePtr.p->tupheadsize) << 1; - req->tupAddr = (fragPageId << MAX_TUPLES_BITS) | pageIndex; - ndbrequire(req->dataOffset + req->dataSize <= tablePtr.p->tupheadsize); - Uint32* data = &pagePtr.p->pageWord[req->pageOffset] + attrDataOffset; - Uint32* buffer = (Uint32*)sig + TupStoreTh::SignalLength; - memcpy(data + req->dataOffset, buffer + req->dataOffset, req->dataSize << 2); + return true; } - break; - case TupStoreTh::OpUpdate: - ljam(); - { - PagePtr pagePtr; - if (req->pageId == RNIL) { - ljam(); - Uint32 fragPageId = req->tupAddr >> MAX_TUPLES_BITS; - Uint32 pageIndex = req->tupAddr & ((1 << MAX_TUPLES_BITS ) - 1); - ndbrequire((pageIndex & 0x1) == 0); - req->pageId = getRealpid(fragPtr.p, fragPageId); - req->pageOffset = ZPAGE_HEADER_SIZE + (pageIndex >> 1) * tablePtr.p->tupheadsize; - } - pagePtr.i = req->pageId; - ptrCheckGuard(pagePtr, cnoOfPage, page); - Uint32* data = &pagePtr.p->pageWord[req->pageOffset] + attrDataOffset; - Uint32* buffer = (Uint32*)sig + TupStoreTh::SignalLength; - ndbrequire(req->dataOffset + req->dataSize <= tablePtr.p->tupheadsize); - memcpy(data + req->dataOffset, buffer + req->dataOffset, req->dataSize << 2); - } - break; - case TupStoreTh::OpDelete: - ljam(); - { - PagePtr pagePtr; - if (req->pageId == RNIL) { - ljam(); - Uint32 fragPageId = req->tupAddr >> MAX_TUPLES_BITS; - Uint32 pageIndex = req->tupAddr & ((1 << MAX_TUPLES_BITS ) - 1); - ndbrequire((pageIndex & 0x1) == 0); - req->pageId = getRealpid(fragPtr.p, fragPageId); - req->pageOffset = ZPAGE_HEADER_SIZE + (pageIndex >> 1) * tablePtr.p->tupheadsize; - } - pagePtr.i = req->pageId; - ptrCheckGuard(pagePtr, cnoOfPage, page); - freeTh(fragPtr.p, tablePtr.p, signal, pagePtr.p, req->pageOffset); - // null location - req->tupAddr = (Uint32)-1; - req->pageId = RNIL; - req->pageOffset = 0; - } - break; } - // copy back - *sig = *req; + return false; } // ordered index build diff --git a/ndb/src/kernel/blocks/dbtux/Dbtux.hpp b/ndb/src/kernel/blocks/dbtux/Dbtux.hpp index 62f47af94bd..1a3c7f64ac3 100644 --- a/ndb/src/kernel/blocks/dbtux/Dbtux.hpp +++ b/ndb/src/kernel/blocks/dbtux/Dbtux.hpp @@ -37,7 +37,6 @@ #include <signaldata/AlterIndx.hpp> #include <signaldata/DropTab.hpp> #include <signaldata/TuxMaint.hpp> -#include <signaldata/TupAccess.hpp> #include <signaldata/AccScan.hpp> #include <signaldata/TuxBound.hpp> #include <signaldata/NextScan.hpp> @@ -77,10 +76,14 @@ #define jam() jamLine(60000 + __LINE__) #define jamEntry() jamEntryLine(60000 + __LINE__) #endif -#ifdef DBTUX_CMP_CPP +#ifdef DBTUX_SEARCH_CPP #define jam() jamLine(70000 + __LINE__) #define jamEntry() jamEntryLine(70000 + __LINE__) #endif +#ifdef DBTUX_CMP_CPP +#define jam() jamLine(80000 + __LINE__) +#define jamEntry() jamEntryLine(80000 + __LINE__) +#endif #ifdef DBTUX_DEBUG_CPP #define jam() jamLine(90000 + __LINE__) #define jamEntry() jamEntryLine(90000 + __LINE__) @@ -112,6 +115,7 @@ public: static const unsigned DescPageSize = 256; private: static const unsigned MaxTreeNodeSize = MAX_TTREE_NODE_SIZE; + static const unsigned MaxPrefSize = MAX_TTREE_PREF_SIZE; static const unsigned ScanBoundSegmentSize = 7; static const unsigned MaxAccLockOps = MAX_PARALLEL_OP_PER_SCAN; BLOCK_DEFINES(Dbtux); @@ -206,19 +210,19 @@ private: unsigned m_fragBit : 1; // which duplicated table fragment TreeEnt(); // methods + bool eq(const TreeEnt ent) const; int cmp(const TreeEnt ent) const; }; static const unsigned TreeEntSize = sizeof(TreeEnt) >> 2; static const TreeEnt NullTreeEnt; /* - * Tree node has 1) fixed part 2) actual table data for min and max - * prefix 3) max and min entries 4) rest of entries 5) one extra entry + * Tree node has 1) fixed part 2) a prefix of index key data for min + * entry 3) max and min entries 4) rest of entries 5) one extra entry * used as work space. * * struct TreeNode part 1, size 6 words * min prefix part 2, size TreeHead::m_prefSize - * max prefix part 2, size TreeHead::m_prefSize * max entry part 3 * min entry part 3 * rest of entries part 4 @@ -265,14 +269,14 @@ private: friend struct TreeHead; struct TreeHead { Uint8 m_nodeSize; // words in tree node - Uint8 m_prefSize; // words in min/max prefix each + Uint8 m_prefSize; // words in min prefix Uint8 m_minOccup; // min entries in internal node Uint8 m_maxOccup; // max entries in node TupLoc m_root; // root node TreeHead(); // methods unsigned getSize(AccSize acc) const; - Data getPref(TreeNode* node, unsigned i) const; + Data getPref(TreeNode* node) const; TreeEnt* getEntList(TreeNode* node) const; }; @@ -442,6 +446,7 @@ private: Uint32 m_descPage; // descriptor page Uint16 m_descOff; // offset within the page Uint16 m_numAttrs; + bool m_storeNullKey; union { Uint32 nextPool; }; @@ -465,6 +470,7 @@ private: Uint32 m_descPage; // copy from index level Uint16 m_descOff; Uint16 m_numAttrs; + bool m_storeNullKey; TreeHead m_tree; TupLoc m_freeLoc; // one node pre-allocated for insert DLList<ScanOp> m_scanList; // current scans on this fragment @@ -514,6 +520,8 @@ private: NodeHandle(Frag& frag); NodeHandle(const NodeHandle& node); NodeHandle& operator=(const NodeHandle& node); + // check if unassigned + bool isNull(); // getters TupLoc getLink(unsigned i); unsigned getChilds(); // cannot spell @@ -528,56 +536,13 @@ private: void setBalance(int b); void setNodeScan(Uint32 scanPtrI); // access other parts of the node - Data getPref(unsigned i); + Data getPref(); TreeEnt getEnt(unsigned pos); TreeEnt getMinMax(unsigned i); // for ndbrequire and ndbassert void progError(int line, int cause, const char* file); }; - // parameters for methods - - /* - * Copy attribute data. - */ - struct CopyPar { - unsigned m_items; // number of attributes - bool m_headers; // copy headers flag (default true) - unsigned m_maxwords; // limit size (default no limit) - // output - unsigned m_numitems; // number of attributes fully copied - unsigned m_numwords; // number of words copied - CopyPar(); - }; - - /* - * Read index key attributes. - */ - struct ReadPar; - friend struct ReadPar; - struct ReadPar { - TreeEnt m_ent; // tuple to read - unsigned m_first; // first index attribute - unsigned m_count; // number of consecutive index attributes - Data m_data; // set pointer if 0 else copy result to it - unsigned m_size; // number of words (set in read keys only) - ReadPar(); - }; - - /* - * Scan bound comparison. - */ - struct BoundPar; - friend struct BoundPar; - struct BoundPar { - ConstData m_data1; // full bound data - ConstData m_data2; // full or prefix data - unsigned m_count1; // number of bounds - unsigned m_len2; // words in data2 buffer - unsigned m_dir; // 0-lower bound 1-upper bound - BoundPar(); - }; - // methods /* @@ -589,7 +554,7 @@ private: // utils void setKeyAttrs(const Frag& frag); void readKeyAttrs(const Frag& frag, TreeEnt ent, unsigned start, TableData keyData); - void copyAttrs(Data dst, ConstData src, CopyPar& copyPar); + void readTablePk(const Frag& frag, TreeEnt ent, unsigned& pkSize, Data pkData); void copyAttrs(const Frag& frag, TableData data1, Data data2, unsigned maxlen2 = MaxAttrDataSize); /* @@ -607,8 +572,6 @@ private: * DbtuxMaint.cpp */ void execTUX_MAINT_REQ(Signal* signal); - void tupReadAttrs(Signal* signal, const Frag& frag, ReadPar& readPar); - void tupReadKeys(Signal* signal, const Frag& frag, ReadPar& readPar); /* * DbtuxNode.cpp @@ -618,7 +581,7 @@ private: void selectNode(Signal* signal, NodeHandle& node, TupLoc loc, AccSize acc); void insertNode(Signal* signal, NodeHandle& node, AccSize acc); void deleteNode(Signal* signal, NodeHandle& node); - void setNodePref(Signal* signal, NodeHandle& node, unsigned i); + void setNodePref(Signal* signal, NodeHandle& node); // node operations void nodePushUp(Signal* signal, NodeHandle& node, unsigned pos, const TreeEnt& ent); void nodePopDown(Signal* signal, NodeHandle& node, unsigned pos, TreeEnt& ent); @@ -633,7 +596,6 @@ private: /* * DbtuxTree.cpp */ - void treeSearch(Signal* signal, Frag& frag, TableData searchKey, TreeEnt searchEnt, TreePos& treePos); void treeAdd(Signal* signal, Frag& frag, TreePos treePos, TreeEnt ent); void treeRemove(Signal* signal, Frag& frag, TreePos treePos); void treeRotateSingle(Signal* signal, Frag& frag, NodeHandle& node, unsigned i); @@ -658,11 +620,19 @@ private: void releaseScanOp(ScanOpPtr& scanPtr); /* + * DbtuxSearch.cpp + */ + void searchToAdd(Signal* signal, Frag& frag, TableData searchKey, TreeEnt searchEnt, TreePos& treePos); + void searchToRemove(Signal* signal, Frag& frag, TableData searchKey, TreeEnt searchEnt, TreePos& treePos); + void searchToScan(Signal* signal, Frag& frag, ConstData boundInfo, unsigned boundCount, TreePos& treePos); + + /* * DbtuxCmp.cpp */ - int cmpSearchKey(const Frag& frag, unsigned& start, TableData data1, ConstData data2, unsigned maxlen2 = MaxAttrDataSize); - int cmpSearchKey(const Frag& frag, unsigned& start, TableData data1, TableData data2); - int cmpScanBound(const Frag& frag, const BoundPar boundPar); + int cmpSearchKey(const Frag& frag, unsigned& start, TableData searchKey, ConstData entryData, unsigned maxlen = MaxAttrDataSize); + int cmpSearchKey(const Frag& frag, unsigned& start, TableData searchKey, TableData entryKey); + int cmpScanBound(const Frag& frag, unsigned dir, ConstData boundInfo, unsigned boundCount, ConstData entryData, unsigned maxlen = MaxAttrDataSize); + int cmpScanBound(const Frag& frag, unsigned dir, ConstData boundInfo, unsigned boundCount, TableData entryKey); /* * DbtuxDebug.cpp @@ -675,6 +645,7 @@ private: TupLoc m_parent; // expected parent address int m_depth; // returned depth unsigned m_occup; // returned occupancy + TreeEnt m_minmax[2]; // returned subtree min and max bool m_ok; // returned status PrintPar(); }; @@ -699,6 +670,8 @@ private: DebugTree = 4, // log and check tree after each op DebugScan = 8 // log scans }; + static const int DataFillByte = 0xa2; + static const int NodeFillByte = 0xa4; #endif // start up info @@ -859,13 +832,18 @@ Dbtux::TreeEnt::TreeEnt() : { } +inline bool +Dbtux::TreeEnt::eq(const TreeEnt ent) const +{ + return + m_tupLoc == ent.m_tupLoc && + m_tupVersion == ent.m_tupVersion && + m_fragBit == ent.m_fragBit; +} + inline int Dbtux::TreeEnt::cmp(const TreeEnt ent) const { - if (m_fragBit < ent.m_fragBit) - return -1; - if (m_fragBit > ent.m_fragBit) - return +1; if (m_tupLoc.m_pageId < ent.m_tupLoc.m_pageId) return -1; if (m_tupLoc.m_pageId > ent.m_tupLoc.m_pageId) @@ -878,6 +856,10 @@ Dbtux::TreeEnt::cmp(const TreeEnt ent) const return -1; if (m_tupVersion > ent.m_tupVersion) return +1; + if (m_fragBit < ent.m_fragBit) + return -1; + if (m_fragBit > ent.m_fragBit) + return +1; return 0; } @@ -920,7 +902,7 @@ Dbtux::TreeHead::getSize(AccSize acc) const case AccHead: return NodeHeadSize; case AccPref: - return NodeHeadSize + 2 * m_prefSize + 2 * TreeEntSize; + return NodeHeadSize + m_prefSize + 2 * TreeEntSize; case AccFull: return m_nodeSize; } @@ -929,16 +911,16 @@ Dbtux::TreeHead::getSize(AccSize acc) const } inline Dbtux::Data -Dbtux::TreeHead::getPref(TreeNode* node, unsigned i) const +Dbtux::TreeHead::getPref(TreeNode* node) const { - Uint32* ptr = (Uint32*)node + NodeHeadSize + i * m_prefSize; + Uint32* ptr = (Uint32*)node + NodeHeadSize; return ptr; } inline Dbtux::TreeEnt* Dbtux::TreeHead::getEntList(TreeNode* node) const { - Uint32* ptr = (Uint32*)node + NodeHeadSize + 2 * m_prefSize; + Uint32* ptr = (Uint32*)node + NodeHeadSize + m_prefSize; return (TreeEnt*)ptr; } @@ -1013,7 +995,8 @@ Dbtux::Index::Index() : m_numFrags(0), m_descPage(RNIL), m_descOff(0), - m_numAttrs(0) + m_numAttrs(0), + m_storeNullKey(false) { for (unsigned i = 0; i < MaxIndexFragments; i++) { m_fragId[i] = ZNIL; @@ -1032,6 +1015,7 @@ Dbtux::Frag::Frag(ArrayPool<ScanOp>& scanOpPool) : m_descPage(RNIL), m_descOff(0), m_numAttrs(ZNIL), + m_storeNullKey(false), m_tree(), m_freeLoc(), m_scanList(scanOpPool), @@ -1087,6 +1071,12 @@ Dbtux::NodeHandle::operator=(const NodeHandle& node) return *this; } +inline bool +Dbtux::NodeHandle::isNull() +{ + return m_node == 0; +} + inline Dbtux::TupLoc Dbtux::NodeHandle::getLink(unsigned i) { @@ -1161,11 +1151,11 @@ Dbtux::NodeHandle::setNodeScan(Uint32 scanPtrI) } inline Dbtux::Data -Dbtux::NodeHandle::getPref(unsigned i) +Dbtux::NodeHandle::getPref() { TreeHead& tree = m_frag.m_tree; - ndbrequire(m_acc >= AccPref && i <= 1); - return tree.getPref(m_node, i); + ndbrequire(m_acc >= AccPref); + return tree.getPref(m_node); } inline Dbtux::TreeEnt @@ -1193,36 +1183,6 @@ Dbtux::NodeHandle::getMinMax(unsigned i) // parameters for methods -inline -Dbtux::CopyPar::CopyPar() : - m_items(0), - m_headers(true), - m_maxwords(~0), // max unsigned - // output - m_numitems(0), - m_numwords(0) -{ -} - -inline -Dbtux::ReadPar::ReadPar() : - m_first(0), - m_count(0), - m_data(0), - m_size(0) -{ -} - -inline -Dbtux::BoundPar::BoundPar() : - m_data1(0), - m_data2(0), - m_count1(0), - m_len2(0), - m_dir(255) -{ -} - #ifdef VM_TRACE inline Dbtux::PrintPar::PrintPar() : diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxCmp.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxCmp.cpp index 7601a14a242..1b8755a1dc4 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxCmp.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxCmp.cpp @@ -25,14 +25,14 @@ * prefix may be partial in which case CmpUnknown may be returned. */ int -Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, TableData data1, ConstData data2, unsigned maxlen2) +Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, TableData searchKey, ConstData entryData, unsigned maxlen) { const unsigned numAttrs = frag.m_numAttrs; const DescEnt& descEnt = getDescEnt(frag.m_descPage, frag.m_descOff); // number of words of attribute data left - unsigned len2 = maxlen2; + unsigned len2 = maxlen; // skip to right position in search key - data1 += start; + searchKey += start; int ret = 0; while (start < numAttrs) { if (len2 < AttributeHeaderSize) { @@ -41,20 +41,20 @@ Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, TableData data1, ConstDat break; } len2 -= AttributeHeaderSize; - if (*data1 != 0) { - if (! data2.ah().isNULL()) { + if (*searchKey != 0) { + if (! entryData.ah().isNULL()) { jam(); // current attribute const DescAttr& descAttr = descEnt.m_descAttr[start]; const unsigned typeId = descAttr.m_typeId; // full data size const unsigned size1 = AttributeDescriptor::getSizeInWords(descAttr.m_attrDesc); - ndbrequire(size1 != 0 && size1 == data2.ah().getDataSize()); + ndbrequire(size1 != 0 && size1 == entryData.ah().getDataSize()); const unsigned size2 = min(size1, len2); len2 -= size2; // compare - const Uint32* const p1 = *data1; - const Uint32* const p2 = &data2[AttributeHeaderSize]; + const Uint32* const p1 = *searchKey; + const Uint32* const p2 = &entryData[AttributeHeaderSize]; ret = NdbSqlUtil::cmp(typeId, p1, p2, size1, size2); if (ret != 0) { jam(); @@ -62,20 +62,20 @@ Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, TableData data1, ConstDat } } else { jam(); - // not NULL < NULL - ret = -1; + // not NULL > NULL + ret = +1; break; } } else { - if (! data2.ah().isNULL()) { + if (! entryData.ah().isNULL()) { jam(); - // NULL > not NULL - ret = +1; + // NULL < not NULL + ret = -1; break; } } - data1 += 1; - data2 += AttributeHeaderSize + data2.ah().getDataSize(); + searchKey += 1; + entryData += AttributeHeaderSize + entryData.ah().getDataSize(); start++; } // XXX until data format errors are handled @@ -89,17 +89,17 @@ Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, TableData data1, ConstDat * Start position is updated as in previous routine. */ int -Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, TableData data1, TableData data2) +Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, TableData searchKey, TableData entryKey) { const unsigned numAttrs = frag.m_numAttrs; const DescEnt& descEnt = getDescEnt(frag.m_descPage, frag.m_descOff); // skip to right position - data1 += start; - data2 += start; + searchKey += start; + entryKey += start; int ret = 0; while (start < numAttrs) { - if (*data1 != 0) { - if (*data2 != 0) { + if (*searchKey != 0) { + if (*entryKey != 0) { jam(); // current attribute const DescAttr& descAttr = descEnt.m_descAttr[start]; @@ -107,8 +107,8 @@ Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, TableData data1, TableDat // full data size const unsigned size1 = AttributeDescriptor::getSizeInWords(descAttr.m_attrDesc); // compare - const Uint32* const p1 = *data1; - const Uint32* const p2 = *data2; + const Uint32* const p1 = *searchKey; + const Uint32* const p2 = *entryKey; ret = NdbSqlUtil::cmp(typeId, p1, p2, size1, size1); if (ret != 0) { jam(); @@ -116,20 +116,20 @@ Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, TableData data1, TableDat } } else { jam(); - // not NULL < NULL - ret = -1; + // not NULL > NULL + ret = +1; break; } } else { - if (*data2 != 0) { + if (*entryKey != 0) { jam(); - // NULL > not NULL - ret = +1; + // NULL < not NULL + ret = -1; break; } } - data1 += 1; - data2 += 1; + searchKey += 1; + entryKey += 1; start++; } // XXX until data format errors are handled @@ -137,94 +137,96 @@ Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, TableData data1, TableDat return ret; } - /* - * Scan bound vs tree entry. + * Scan bound vs node prefix. * * Compare lower or upper bound and index attribute data. The attribute * data may be partial in which case CmpUnknown may be returned. - * Returns -1 if the boundary is to the left of the compared key and +1 if - * the boundary is to the right of the compared key. + * Returns -1 if the boundary is to the left of the compared key and +1 + * if the boundary is to the right of the compared key. * - * To get this behaviour we treat equality a little bit special. - * If the boundary is a lower bound then the boundary is to the left of all - * equal keys and if it is an upper bound then the boundary is to the right - * of all equal keys. + * To get this behaviour we treat equality a little bit special. If the + * boundary is a lower bound then the boundary is to the left of all + * equal keys and if it is an upper bound then the boundary is to the + * right of all equal keys. * * When searching for the first key we are using the lower bound to try - * to find the first key that is to the right of the boundary. - * Then we start scanning from this tuple (including the tuple itself) - * until we find the first key which is to the right of the boundary. Then - * we stop and do not include that key in the scan result. + * to find the first key that is to the right of the boundary. Then we + * start scanning from this tuple (including the tuple itself) until we + * find the first key which is to the right of the boundary. Then we + * stop and do not include that key in the scan result. */ int -Dbtux::cmpScanBound(const Frag& frag, const BoundPar boundPar) +Dbtux::cmpScanBound(const Frag& frag, unsigned dir, ConstData boundInfo, unsigned boundCount, ConstData entryData, unsigned maxlen) { - unsigned type = 4; - int ret = 0; - /* - No boundary means full scan, low boundary is to the right of all keys. - Thus we should always return -1. For upper bound we are to the right of - all keys, thus we should always return +1. We achieve this behaviour - by initialising return value to 0 and set type to 4. - */ const DescEnt& descEnt = getDescEnt(frag.m_descPage, frag.m_descOff); - ConstData data1 = boundPar.m_data1; - ConstData data2 = boundPar.m_data2; // direction 0-lower 1-upper - const unsigned dir = boundPar.m_dir; ndbrequire(dir <= 1); // number of words of data left - unsigned len2 = boundPar.m_len2; - for (unsigned i = 0; i < boundPar.m_count1; i++) { + unsigned len2 = maxlen; + /* + * No boundary means full scan, low boundary is to the right of all + * keys. Thus we should always return -1. For upper bound we are to + * the right of all keys, thus we should always return +1. We achieve + * this behaviour by initializing type to 4. + */ + unsigned type = 4; + while (boundCount != 0) { if (len2 < AttributeHeaderSize) { jam(); return NdbSqlUtil::CmpUnknown; } len2 -= AttributeHeaderSize; // get and skip bound type - type = data1[0]; - data1 += 1; - ndbrequire(! data1.ah().isNULL()); - if (! data2.ah().isNULL()) { - jam(); - // current attribute - const unsigned index = data1.ah().getAttributeId(); - const DescAttr& descAttr = descEnt.m_descAttr[index]; - const unsigned typeId = descAttr.m_typeId; - ndbrequire(data2.ah().getAttributeId() == descAttr.m_primaryAttrId); - // full data size - const unsigned size1 = data1.ah().getDataSize(); - ndbrequire(size1 != 0 && size1 == data2.ah().getDataSize()); - const unsigned size2 = min(size1, len2); - len2 -= size2; - // compare - const Uint32* const p1 = &data1[AttributeHeaderSize]; - const Uint32* const p2 = &data2[AttributeHeaderSize]; - ret = NdbSqlUtil::cmp(typeId, p1, p2, size1, size2); - if (ret != 0) { + type = boundInfo[0]; + boundInfo += 1; + if (! boundInfo.ah().isNULL()) { + if (! entryData.ah().isNULL()) { jam(); - return ret; + // current attribute + const unsigned index = boundInfo.ah().getAttributeId(); + const DescAttr& descAttr = descEnt.m_descAttr[index]; + const unsigned typeId = descAttr.m_typeId; + ndbrequire(entryData.ah().getAttributeId() == descAttr.m_primaryAttrId); + // full data size + const unsigned size1 = boundInfo.ah().getDataSize(); + ndbrequire(size1 != 0 && size1 == entryData.ah().getDataSize()); + const unsigned size2 = min(size1, len2); + len2 -= size2; + // compare + const Uint32* const p1 = &boundInfo[AttributeHeaderSize]; + const Uint32* const p2 = &entryData[AttributeHeaderSize]; + int ret = NdbSqlUtil::cmp(typeId, p1, p2, size1, size2); + // XXX until data format errors are handled + ndbrequire(ret != NdbSqlUtil::CmpError); + if (ret != 0) { + jam(); + return ret; + } + } else { + jam(); + // not NULL > NULL + return +1; } } else { jam(); - /* - NULL is bigger than any bound, thus the boundary is always to the - left of NULL - */ - return -1; + if (! entryData.ah().isNULL()) { + jam(); + // NULL < not NULL + return -1; + } } - data1 += AttributeHeaderSize + data1.ah().getDataSize(); - data2 += AttributeHeaderSize + data2.ah().getDataSize(); + boundInfo += AttributeHeaderSize + boundInfo.ah().getDataSize(); + entryData += AttributeHeaderSize + entryData.ah().getDataSize(); + boundCount -= 1; } - ndbassert(ret == 0); if (dir == 0) { jam(); /* - Looking for the lower bound. If strict lower bound then the boundary is - to the right of the compared key and otherwise (equal included in range) - then the boundary is to the left of the key. - */ + * Looking for the lower bound. If strict lower bound then the + * boundary is to the right of the compared key and otherwise (equal + * included in range) then the boundary is to the left of the key. + */ if (type == 1) { jam(); return +1; @@ -233,10 +235,11 @@ Dbtux::cmpScanBound(const Frag& frag, const BoundPar boundPar) } else { jam(); /* - Looking for the upper bound. If strict upper bound then the boundary is - to the left of all equal keys and otherwise (equal included in the - range) then the boundary is to the right of all equal keys. - */ + * Looking for the upper bound. If strict upper bound then the + * boundary is to the left of all equal keys and otherwise (equal + * included in the range) then the boundary is to the right of all + * equal keys. + */ if (type == 3) { jam(); return -1; @@ -245,3 +248,72 @@ Dbtux::cmpScanBound(const Frag& frag, const BoundPar boundPar) } } +/* + * Scan bound vs tree entry. + */ +int +Dbtux::cmpScanBound(const Frag& frag, unsigned dir, ConstData boundInfo, unsigned boundCount, TableData entryKey) +{ + const DescEnt& descEnt = getDescEnt(frag.m_descPage, frag.m_descOff); + // direction 0-lower 1-upper + ndbrequire(dir <= 1); + // initialize type to equality + unsigned type = 4; + while (boundCount != 0) { + // get and skip bound type + type = boundInfo[0]; + boundInfo += 1; + if (! boundInfo.ah().isNULL()) { + if (*entryKey != 0) { + jam(); + // current attribute + const unsigned index = boundInfo.ah().getAttributeId(); + const DescAttr& descAttr = descEnt.m_descAttr[index]; + const unsigned typeId = descAttr.m_typeId; + // full data size + const unsigned size1 = AttributeDescriptor::getSizeInWords(descAttr.m_attrDesc); + // compare + const Uint32* const p1 = &boundInfo[AttributeHeaderSize]; + const Uint32* const p2 = *entryKey; + int ret = NdbSqlUtil::cmp(typeId, p1, p2, size1, size1); + // XXX until data format errors are handled + ndbrequire(ret != NdbSqlUtil::CmpError); + if (ret != 0) { + jam(); + return ret; + } + } else { + jam(); + // not NULL > NULL + return +1; + } + } else { + jam(); + if (*entryKey != 0) { + jam(); + // NULL < not NULL + return -1; + } + } + boundInfo += AttributeHeaderSize + boundInfo.ah().getDataSize(); + entryKey += 1; + boundCount -= 1; + } + if (dir == 0) { + // lower bound + jam(); + if (type == 1) { + jam(); + return +1; + } + return -1; + } else { + // upper bound + jam(); + if (type == 3) { + jam(); + return -1; + } + return +1; + } +} diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxDebug.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxDebug.cpp index c4931685305..11f4f12b7f6 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxDebug.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxDebug.cpp @@ -137,16 +137,17 @@ Dbtux::printNode(Signal* signal, Frag& frag, NdbOut& out, TupLoc loc, PrintPar& par.m_ok = false; } } + static const char* const sep = " *** "; // check child-parent links if (node.getLink(2) != par.m_parent) { par.m_ok = false; - out << par.m_path << " *** "; + out << par.m_path << sep; out << "parent loc " << hex << node.getLink(2); out << " should be " << hex << par.m_parent << endl; } if (node.getSide() != par.m_side) { par.m_ok = false; - out << par.m_path << " *** "; + out << par.m_path << sep; out << "side " << dec << node.getSide(); out << " should be " << dec << par.m_side << endl; } @@ -154,26 +155,26 @@ Dbtux::printNode(Signal* signal, Frag& frag, NdbOut& out, TupLoc loc, PrintPar& const int balance = -cpar[0].m_depth + cpar[1].m_depth; if (node.getBalance() != balance) { par.m_ok = false; - out << par.m_path << " *** "; + out << par.m_path << sep; out << "balance " << node.getBalance(); out << " should be " << balance << endl; } if (abs(node.getBalance()) > 1) { par.m_ok = false; - out << par.m_path << " *** "; + out << par.m_path << sep; out << "balance " << node.getBalance() << " is invalid" << endl; } // check occupancy - if (node.getOccup() > tree.m_maxOccup) { + if (node.getOccup() == 0 || node.getOccup() > tree.m_maxOccup) { par.m_ok = false; - out << par.m_path << " *** "; + out << par.m_path << sep; out << "occupancy " << node.getOccup(); - out << " greater than max " << tree.m_maxOccup << endl; + out << " zero or greater than max " << tree.m_maxOccup << endl; } // check for occupancy of interior node if (node.getChilds() == 2 && node.getOccup() < tree.m_minOccup) { par.m_ok = false; - out << par.m_path << " *** "; + out << par.m_path << sep; out << "occupancy " << node.getOccup() << " of interior node"; out << " less than min " << tree.m_minOccup << endl; } @@ -183,13 +184,74 @@ Dbtux::printNode(Signal* signal, Frag& frag, NdbOut& out, TupLoc loc, PrintPar& node.getLink(1 - i) == NullTupLoc && node.getOccup() + cpar[i].m_occup <= tree.m_maxOccup) { par.m_ok = false; - out << par.m_path << " *** "; + out << par.m_path << sep; out << "missed merge with child " << i << endl; } } + // check inline prefix + { ConstData data1 = node.getPref(); + Uint32 data2[MaxPrefSize]; + memset(data2, DataFillByte, MaxPrefSize << 2); + readKeyAttrs(frag, node.getMinMax(0), 0, c_searchKey); + copyAttrs(frag, c_searchKey, data2, tree.m_prefSize); + for (unsigned n = 0; n < tree.m_prefSize; n++) { + if (data1[n] != data2[n]) { + par.m_ok = false; + out << par.m_path << sep; + out << "inline prefix mismatch word " << n; + out << " value " << hex << data1[n]; + out << " should be " << hex << data2[n] << endl; + break; + } + } + } + // check ordering within node + for (unsigned j = 1; j < node.getOccup(); j++) { + unsigned start = 0; + const TreeEnt ent1 = node.getEnt(j - 1); + const TreeEnt ent2 = node.getEnt(j); + if (j == 1) { + readKeyAttrs(frag, ent1, start, c_searchKey); + } else { + memcpy(c_searchKey, c_entryKey, frag.m_numAttrs << 2); + } + readKeyAttrs(frag, ent2, start, c_entryKey); + int ret = cmpSearchKey(frag, start, c_searchKey, c_entryKey); + if (ret == 0) + ret = ent1.cmp(ent2); + if (ret != -1) { + par.m_ok = false; + out << par.m_path << sep; + out << " disorder within node at pos " << j << endl; + } + } + // check ordering wrt subtrees + for (unsigned i = 0; i <= 1; i++) { + if (node.getLink(i) == NullTupLoc) + continue; + const TreeEnt ent1 = cpar[i].m_minmax[1 - i]; + const TreeEnt ent2 = node.getMinMax(i); + unsigned start = 0; + readKeyAttrs(frag, ent1, start, c_searchKey); + readKeyAttrs(frag, ent2, start, c_entryKey); + int ret = cmpSearchKey(frag, start, c_searchKey, c_entryKey); + if (ret == 0) + ret = ent1.cmp(ent2); + if (ret != (i == 0 ? -1 : +1)) { + par.m_ok = false; + out << par.m_path << sep; + out << " disorder wrt subtree " << i << endl; + } + } // return values par.m_depth = 1 + max(cpar[0].m_depth, cpar[1].m_depth); par.m_occup = node.getOccup(); + for (unsigned i = 0; i <= 1; i++) { + if (node.getLink(i) == NullTupLoc) + par.m_minmax[i] = node.getMinMax(i); + else + par.m_minmax[i] = cpar[i].m_minmax[i]; + } } NdbOut& @@ -355,20 +417,19 @@ operator<<(NdbOut& out, const Dbtux::NodeHandle& node) out << " [acc " << dec << node.m_acc << "]"; out << " [node " << *node.m_node << "]"; if (node.m_acc >= Dbtux::AccPref) { - for (unsigned i = 0; i <= 1; i++) { - out << " [pref " << dec << i; - const Uint32* data = (const Uint32*)node.m_node + Dbtux::NodeHeadSize + i * tree.m_prefSize; - for (unsigned j = 0; j < node.m_frag.m_tree.m_prefSize; j++) - out << " " << hex << data[j]; - out << "]"; - } + const Uint32* data; + out << " [pref"; + data = (const Uint32*)node.m_node + Dbtux::NodeHeadSize; + for (unsigned j = 0; j < tree.m_prefSize; j++) + out << " " << hex << data[j]; + out << "]"; out << " [entList"; unsigned numpos = node.m_node->m_occup; if (node.m_acc < Dbtux::AccFull && numpos > 2) { numpos = 2; out << "(" << dec << numpos << ")"; } - const Uint32* data = (const Uint32*)node.m_node + Dbtux::NodeHeadSize + 2 * tree.m_prefSize; + data = (const Uint32*)node.m_node + Dbtux::NodeHeadSize + tree.m_prefSize; const Dbtux::TreeEnt* entList = (const Dbtux::TreeEnt*)data; for (unsigned pos = 0; pos < numpos; pos++) out << " " << entList[pos]; diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxGen.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxGen.cpp index 93a5c78338c..22b2ce69838 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxGen.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxGen.cpp @@ -26,8 +26,13 @@ Dbtux::Dbtux(const Configuration& conf) : #ifdef VM_TRACE debugFile(0), debugOut(*new NullOutputStream()), + // until ndb_mgm supports dump +#ifdef DBTUX_DEBUG_TREE + debugFlags(DebugTree), +#else debugFlags(0), #endif +#endif c_internalStartPhase(0), c_typeOfStart(NodeState::ST_ILLEGAL_TYPE), c_dataBuffer(0) @@ -241,37 +246,14 @@ Dbtux::readKeyAttrs(const Frag& frag, TreeEnt ent, unsigned start, TableData key } void -Dbtux::copyAttrs(Data dst, ConstData src, CopyPar& copyPar) +Dbtux::readTablePk(const Frag& frag, TreeEnt ent, unsigned& pkSize, Data pkData) { - CopyPar c = copyPar; - c.m_numitems = 0; - c.m_numwords = 0; - while (c.m_numitems < c.m_items) { - jam(); - if (c.m_headers) { - unsigned i = 0; - while (i < AttributeHeaderSize) { - if (c.m_numwords >= c.m_maxwords) { - copyPar = c; - return; - } - dst[c.m_numwords++] = src[i++]; - } - } - unsigned size = src.ah().getDataSize(); - src += AttributeHeaderSize; - unsigned i = 0; - while (i < size) { - if (c.m_numwords >= c.m_maxwords) { - copyPar = c; - return; - } - dst[c.m_numwords++] = src[i++]; - } - src += size; - c.m_numitems++; - } - copyPar = c; + const Uint32 tableFragPtrI = frag.m_tupTableFragPtrI[ent.m_fragBit]; + const TupLoc tupLoc = ent.m_tupLoc; + Uint32 size = 0; + c_tup->tuxReadKeys(tableFragPtrI, tupLoc.m_pageId, tupLoc.m_pageOffset, &size, pkData); + ndbrequire(size != 0); + pkSize = size; } /* @@ -314,6 +296,9 @@ Dbtux::copyAttrs(const Frag& frag, TableData data1, Data data2, unsigned maxlen2 keyAttrs += 1; data1 += 1; } +#ifdef VM_TRACE + memset(data2, DataFillByte, len2 << 2); +#endif } BLOCK_FUNCTIONS(Dbtux); diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxMaint.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxMaint.cpp index fc72611a273..24b030bf8ec 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxMaint.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxMaint.cpp @@ -82,8 +82,8 @@ Dbtux::execTUX_MAINT_REQ(Signal* signal) ent.m_fragBit = fragBit; // read search key readKeyAttrs(frag, ent, 0, c_searchKey); - // check if all keys are null - { + if (! frag.m_storeNullKey) { + // check if all keys are null const unsigned numAttrs = frag.m_numAttrs; bool allNull = true; for (unsigned i = 0; i < numAttrs; i++) { @@ -111,19 +111,18 @@ Dbtux::execTUX_MAINT_REQ(Signal* signal) debugOut << endl; } #endif - // find position in tree - TreePos treePos; - treeSearch(signal, frag, c_searchKey, ent, treePos); -#ifdef VM_TRACE - if (debugFlags & DebugMaint) { - debugOut << treePos << endl; - } -#endif // do the operation req->errorCode = 0; + TreePos treePos; switch (opCode) { case TuxMaintReq::OpAdd: jam(); + searchToAdd(signal, frag, c_searchKey, ent, treePos); +#ifdef VM_TRACE + if (debugFlags & DebugMaint) { + debugOut << treePos << endl; + } +#endif if (treePos.m_match) { jam(); // there is no "Building" state so this will have to do @@ -152,6 +151,12 @@ Dbtux::execTUX_MAINT_REQ(Signal* signal) break; case TuxMaintReq::OpRemove: jam(); + searchToRemove(signal, frag, c_searchKey, ent, treePos); +#ifdef VM_TRACE + if (debugFlags & DebugMaint) { + debugOut << treePos << endl; + } +#endif if (! treePos.m_match) { jam(); // there is no "Building" state so this will have to do @@ -167,7 +172,6 @@ Dbtux::execTUX_MAINT_REQ(Signal* signal) ndbrequire(false); break; } - // commit and release nodes #ifdef VM_TRACE if (debugFlags & DebugTree) { printTree(signal, frag, debugOut); @@ -176,89 +180,3 @@ Dbtux::execTUX_MAINT_REQ(Signal* signal) // copy back *sig = *req; } - -/* - * Read index key attributes from TUP. If buffer is provided the data - * is copied to it. Otherwise pointer is set to signal data. - */ -void -Dbtux::tupReadAttrs(Signal* signal, const Frag& frag, ReadPar& readPar) -{ - // define the direct signal - const TreeEnt ent = readPar.m_ent; - TupReadAttrs* const req = (TupReadAttrs*)signal->getDataPtrSend(); - req->errorCode = RNIL; - req->requestInfo = 0; - req->tableId = frag.m_tableId; - req->fragId = frag.m_fragId | (ent.m_fragBit << frag.m_fragOff); - req->fragPtrI = frag.m_tupTableFragPtrI[ent.m_fragBit]; - req->tupAddr = (Uint32)-1; - req->tupVersion = ent.m_tupVersion; - req->pageId = ent.m_tupLoc.m_pageId; - req->pageOffset = ent.m_tupLoc.m_pageOffset; - req->bufferId = 0; - // add count and list of attribute ids - Data data = (Uint32*)req + TupReadAttrs::SignalLength; - data[0] = readPar.m_count; - data += 1; - const DescEnt& descEnt = getDescEnt(frag.m_descPage, frag.m_descOff); - for (Uint32 i = 0; i < readPar.m_count; i++) { - jam(); - const DescAttr& descAttr = descEnt.m_descAttr[readPar.m_first + i]; - data.ah() = AttributeHeader(descAttr.m_primaryAttrId, 0); - data += 1; - } - // execute - EXECUTE_DIRECT(DBTUP, GSN_TUP_READ_ATTRS, signal, TupReadAttrs::SignalLength); - jamEntry(); - ndbrequire(req->errorCode == 0); - // data is at output - if (readPar.m_data == 0) { - readPar.m_data = data; - } else { - jam(); - CopyPar copyPar; - copyPar.m_items = readPar.m_count; - copyPar.m_headers = true; - copyAttrs(readPar.m_data, data, copyPar); - } -} - -/* - * Read primary keys. Copy the data without attribute headers into the - * given buffer. Number of words is returned in ReadPar argument. - */ -void -Dbtux::tupReadKeys(Signal* signal, const Frag& frag, ReadPar& readPar) -{ - // define the direct signal - const TreeEnt ent = readPar.m_ent; - TupReadAttrs* const req = (TupReadAttrs*)signal->getDataPtrSend(); - req->errorCode = RNIL; - req->requestInfo = TupReadAttrs::ReadKeys; - req->tableId = frag.m_tableId; - req->fragId = frag.m_fragId | (ent.m_fragBit << frag.m_fragOff); - req->fragPtrI = frag.m_tupTableFragPtrI[ent.m_fragBit]; - req->tupAddr = (Uint32)-1; - req->tupVersion = RNIL; // not used - req->pageId = ent.m_tupLoc.m_pageId; - req->pageOffset = ent.m_tupLoc.m_pageOffset; - req->bufferId = 0; - // execute - EXECUTE_DIRECT(DBTUP, GSN_TUP_READ_ATTRS, signal, TupReadAttrs::SignalLength); - jamEntry(); - ndbrequire(req->errorCode == 0); - // copy out in special format - ConstData data = (Uint32*)req + TupReadAttrs::SignalLength; - const Uint32 numKeys = data[0]; - data += 1 + numKeys; - // copy out without headers - ndbrequire(readPar.m_data != 0); - CopyPar copyPar; - copyPar.m_items = numKeys; - copyPar.m_headers = false; - copyAttrs(readPar.m_data, data, copyPar); - // return counts - readPar.m_count = numKeys; - readPar.m_size = copyPar.m_numwords; -} diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxMeta.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxMeta.cpp index ca6a3e69931..b30b555ccad 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxMeta.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxMeta.cpp @@ -85,6 +85,7 @@ Dbtux::execTUXFRAGREQ(Signal* signal) fragPtr.p->m_fragOff = req->fragOff; fragPtr.p->m_fragId = req->fragId; fragPtr.p->m_numAttrs = req->noOfAttr; + fragPtr.p->m_storeNullKey = true; // not yet configurable fragPtr.p->m_tupIndexFragPtrI = req->tupIndexFragPtrI; fragPtr.p->m_tupTableFragPtrI[0] = req->tupTableFragPtrI[0]; fragPtr.p->m_tupTableFragPtrI[1] = req->tupTableFragPtrI[1]; @@ -111,6 +112,7 @@ Dbtux::execTUXFRAGREQ(Signal* signal) indexPtr.p->m_tableId = req->primaryTableId; indexPtr.p->m_fragOff = req->fragOff; indexPtr.p->m_numAttrs = req->noOfAttr; + indexPtr.p->m_storeNullKey = true; // not yet configurable // allocate attribute descriptors if (! allocDescEnt(indexPtr)) { jam(); diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxNode.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxNode.cpp index c969e35dc82..a1bfa2179bb 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxNode.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxNode.cpp @@ -85,10 +85,9 @@ Dbtux::insertNode(Signal* signal, NodeHandle& node, AccSize acc) new (node.m_node) TreeNode(); #ifdef VM_TRACE TreeHead& tree = frag.m_tree; - memset(node.getPref(0), 0xa2, tree.m_prefSize << 2); - memset(node.getPref(1), 0xa2, tree.m_prefSize << 2); + memset(node.getPref(), DataFillByte, tree.m_prefSize << 2); TreeEnt* entList = tree.getEntList(node.m_node); - memset(entList, 0xa4, (tree.m_maxOccup + 1) * (TreeEntSize << 2)); + memset(entList, NodeFillByte, (tree.m_maxOccup + 1) * (TreeEntSize << 2)); #endif } @@ -116,12 +115,12 @@ Dbtux::deleteNode(Signal* signal, NodeHandle& node) * attribute headers for now. XXX use null mask instead */ void -Dbtux::setNodePref(Signal* signal, NodeHandle& node, unsigned i) +Dbtux::setNodePref(Signal* signal, NodeHandle& node) { const Frag& frag = node.m_frag; const TreeHead& tree = frag.m_tree; - readKeyAttrs(frag, node.getMinMax(i), 0, c_entryKey); - copyAttrs(frag, c_entryKey, node.getPref(i), tree.m_prefSize); + readKeyAttrs(frag, node.getMinMax(0), 0, c_entryKey); + copyAttrs(frag, c_entryKey, node.getPref(), tree.m_prefSize); } // node operations @@ -173,11 +172,9 @@ Dbtux::nodePushUp(Signal* signal, NodeHandle& node, unsigned pos, const TreeEnt& tmpList[pos] = ent; entList[0] = entList[occup + 1]; node.setOccup(occup + 1); - // fix prefixes + // fix prefix if (occup == 0 || pos == 0) - setNodePref(signal, node, 0); - if (occup == 0 || pos == occup) - setNodePref(signal, node, 1); + setNodePref(signal, node); } /* @@ -248,11 +245,9 @@ Dbtux::nodePopDown(Signal* signal, NodeHandle& node, unsigned pos, TreeEnt& ent) } entList[0] = entList[occup - 1]; node.setOccup(occup - 1); - // fix prefixes + // fix prefix if (occup != 1 && pos == 0) - setNodePref(signal, node, 0); - if (occup != 1 && pos == occup - 1) - setNodePref(signal, node, 1); + setNodePref(signal, node); } /* @@ -325,11 +320,9 @@ Dbtux::nodePushDown(Signal* signal, NodeHandle& node, unsigned pos, TreeEnt& ent tmpList[pos] = ent; ent = oldMin; entList[0] = entList[occup]; - // fix prefixes + // fix prefix if (true) - setNodePref(signal, node, 0); - if (occup == 1 || pos == occup - 1) - setNodePref(signal, node, 1); + setNodePref(signal, node); } /* @@ -403,11 +396,9 @@ Dbtux::nodePopUp(Signal* signal, NodeHandle& node, unsigned pos, TreeEnt& ent) } tmpList[0] = newMin; entList[0] = entList[occup]; - // fix prefixes + // fix prefix if (true) - setNodePref(signal, node, 0); - if (occup == 1 || pos == occup - 1) - setNodePref(signal, node, 1); + setNodePref(signal, node); } /* diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxScan.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxScan.cpp index 703b0abb683..c4c33ff931f 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxScan.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxScan.cpp @@ -137,7 +137,7 @@ Dbtux::execTUX_BOUND_INFO(Signal* signal) const Uint32* const data = (Uint32*)sig + TuxBoundInfo::SignalLength; unsigned offset = 5; // walk through entries - while (offset + 2 < req->boundAiLength) { + while (offset + 2 <= req->boundAiLength) { jam(); const unsigned type = data[offset]; if (type > 4) { @@ -379,8 +379,8 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal) scanNext(signal, scanPtr); } // for reading tuple key in Current or Locked state - ReadPar keyPar; - keyPar.m_data = 0; // indicates not yet done + Data pkData = c_dataBuffer; + unsigned pkSize = 0; // indicates not yet done if (scan.m_state == ScanOp::Current) { // found an entry to return jam(); @@ -389,9 +389,7 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal) jam(); const TreeEnt ent = scan.m_scanPos.m_ent; // read tuple key - keyPar.m_ent = ent; - keyPar.m_data = c_dataBuffer; - tupReadKeys(signal, frag, keyPar); + readTablePk(frag, ent, pkSize, pkData); // get read lock or exclusive lock AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend(); lockReq->returnCode = RNIL; @@ -403,9 +401,9 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal) lockReq->tableId = scan.m_tableId; lockReq->fragId = frag.m_fragId | (ent.m_fragBit << frag.m_fragOff); lockReq->fragPtrI = frag.m_accTableFragPtrI[ent.m_fragBit]; - const Uint32* const buf32 = static_cast<Uint32*>(keyPar.m_data); + const Uint32* const buf32 = static_cast<Uint32*>(pkData); const Uint64* const buf64 = reinterpret_cast<const Uint64*>(buf32); - lockReq->hashValue = md5_hash(buf64, keyPar.m_size); + lockReq->hashValue = md5_hash(buf64, pkSize); lockReq->tupAddr = getTupAddr(frag, ent); lockReq->transId1 = scan.m_transId1; lockReq->transId2 = scan.m_transId2; @@ -480,11 +478,9 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal) const TreeEnt ent = scan.m_scanPos.m_ent; if (scan.m_keyInfo) { jam(); - if (keyPar.m_data == 0) { + if (pkSize == 0) { jam(); - keyPar.m_ent = ent; - keyPar.m_data = c_dataBuffer; - tupReadKeys(signal, frag, keyPar); + readTablePk(frag, ent, pkSize, pkData); } } // conf signal @@ -510,10 +506,10 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal) // add key info if (scan.m_keyInfo) { jam(); - conf->keyLength = keyPar.m_size; + conf->keyLength = pkSize; // piggy-back first 4 words of key data for (unsigned i = 0; i < 4; i++) { - conf->key[i] = i < keyPar.m_size ? keyPar.m_data[i] : 0; + conf->key[i] = i < pkSize ? pkData[i] : 0; } signalLength = 11; } @@ -525,18 +521,18 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal) EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, signalLength); } // send rest of key data - if (scan.m_keyInfo && keyPar.m_size > 4) { + if (scan.m_keyInfo && pkSize > 4) { unsigned total = 4; - while (total < keyPar.m_size) { + while (total < pkSize) { jam(); - unsigned length = keyPar.m_size - total; + unsigned length = pkSize - total; if (length > 20) length = 20; signal->theData[0] = scan.m_userPtr; signal->theData[1] = 0; signal->theData[2] = 0; signal->theData[3] = length; - memcpy(&signal->theData[4], &keyPar.m_data[total], length << 2); + memcpy(&signal->theData[4], &pkData[total], length << 2); sendSignal(scan.m_userRef, GSN_ACC_SCAN_INFO24, signal, 4 + length, JBB); total += length; @@ -606,6 +602,8 @@ Dbtux::execACCKEYCONF(Signal* signal) // LQH has the ball return; } + // lose the lock + scan.m_accLockOp = RNIL; // continue at ACC_ABORTCONF } @@ -648,6 +646,8 @@ Dbtux::execACCKEYREF(Signal* signal) // LQH has the ball return; } + // lose the lock + scan.m_accLockOp = RNIL; // continue at ACC_ABORTCONF } @@ -689,16 +689,9 @@ Dbtux::scanFirst(Signal* signal, ScanOpPtr scanPtr) ScanOp& scan = *scanPtr.p; Frag& frag = *c_fragPool.getPtr(scan.m_fragPtrI); TreeHead& tree = frag.m_tree; - if (tree.m_root == NullTupLoc) { - // tree may have become empty - jam(); - scan.m_state = ScanOp::Last; - return; - } - TreePos pos; - pos.m_loc = tree.m_root; - NodeHandle node(frag); - // unpack lower bound + // set up index keys for this operation + setKeyAttrs(frag); + // unpack lower bound into c_dataBuffer const ScanBound& bound = *scan.m_bound[0]; ScanBoundIterator iter; bound.first(iter); @@ -707,103 +700,22 @@ Dbtux::scanFirst(Signal* signal, ScanOpPtr scanPtr) c_dataBuffer[j] = *iter.data; bound.next(iter); } - // comparison parameters - BoundPar boundPar; - boundPar.m_data1 = c_dataBuffer; - boundPar.m_count1 = scan.m_boundCnt[0]; - boundPar.m_dir = 0; -loop: { + // search for scan start position + TreePos treePos; + searchToScan(signal, frag, c_dataBuffer, scan.m_boundCnt[0], treePos); + if (treePos.m_loc == NullTupLoc) { + // empty tree jam(); - selectNode(signal, node, pos.m_loc, AccPref); - const unsigned occup = node.getOccup(); - ndbrequire(occup != 0); - for (unsigned i = 0; i <= 1; i++) { - jam(); - // compare prefix - boundPar.m_data2 = node.getPref(i); - boundPar.m_len2 = tree.m_prefSize; - int ret = cmpScanBound(frag, boundPar); - if (ret == NdbSqlUtil::CmpUnknown) { - jam(); - // read full value - ReadPar readPar; - readPar.m_ent = node.getMinMax(i); - readPar.m_first = 0; - readPar.m_count = frag.m_numAttrs; - readPar.m_data = 0; // leave in signal data - tupReadAttrs(signal, frag, readPar); - // compare full value - boundPar.m_data2 = readPar.m_data; - boundPar.m_len2 = ZNIL; // big - ret = cmpScanBound(frag, boundPar); - ndbrequire(ret != NdbSqlUtil::CmpUnknown); - } - if (i == 0 && ret < 0) { - jam(); - const TupLoc loc = node.getLink(i); - if (loc != NullTupLoc) { - jam(); - // continue to left subtree - pos.m_loc = loc; - goto loop; - } - // start scanning this node - pos.m_pos = 0; - pos.m_match = false; - pos.m_dir = 3; - scan.m_scanPos = pos; - scan.m_state = ScanOp::Next; - linkScan(node, scanPtr); - return; - } - if (i == 1 && ret > 0) { - jam(); - const TupLoc loc = node.getLink(i); - if (loc != NullTupLoc) { - jam(); - // continue to right subtree - pos.m_loc = loc; - goto loop; - } - // start scanning upwards - pos.m_dir = 1; - scan.m_scanPos = pos; - scan.m_state = ScanOp::Next; - linkScan(node, scanPtr); - return; - } - } - // read rest of current node - accessNode(signal, node, AccFull); - // look for first entry - ndbrequire(occup >= 2); - for (unsigned j = 1; j < occup; j++) { - jam(); - ReadPar readPar; - readPar.m_ent = node.getEnt(j); - readPar.m_first = 0; - readPar.m_count = frag.m_numAttrs; - readPar.m_data = 0; // leave in signal data - tupReadAttrs(signal, frag, readPar); - // compare - boundPar.m_data2 = readPar.m_data; - boundPar.m_len2 = ZNIL; // big - int ret = cmpScanBound(frag, boundPar); - ndbrequire(ret != NdbSqlUtil::CmpUnknown); - if (ret < 0) { - jam(); - // start scanning this node - pos.m_pos = j; - pos.m_match = false; - pos.m_dir = 3; - scan.m_scanPos = pos; - scan.m_state = ScanOp::Next; - linkScan(node, scanPtr); - return; - } - } - ndbrequire(false); + scan.m_state = ScanOp::Last; + return; } + // set position and state + scan.m_scanPos = treePos; + scan.m_state = ScanOp::Next; + // link the scan to node found + NodeHandle node(frag); + selectNode(signal, node, treePos.m_loc, AccFull); + linkScan(node, scanPtr); } /* @@ -830,7 +742,9 @@ Dbtux::scanNext(Signal* signal, ScanOpPtr scanPtr) if (scan.m_state == ScanOp::Locked) { jam(); // version of a tuple locked by us cannot disappear (assert only) +#ifdef dbtux_wl_1942_is_done ndbassert(false); +#endif AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend(); lockReq->returnCode = RNIL; lockReq->requestInfo = AccLockReq::Unlock; @@ -841,7 +755,9 @@ Dbtux::scanNext(Signal* signal, ScanOpPtr scanPtr) scan.m_accLockOp = RNIL; scan.m_state = ScanOp::Current; } - // unpack upper bound + // set up index keys for this operation + setKeyAttrs(frag); + // unpack upper bound into c_dataBuffer const ScanBound& bound = *scan.m_bound[1]; ScanBoundIterator iter; bound.first(iter); @@ -850,11 +766,6 @@ Dbtux::scanNext(Signal* signal, ScanOpPtr scanPtr) c_dataBuffer[j] = *iter.data; bound.next(iter); } - // comparison parameters - BoundPar boundPar; - boundPar.m_data1 = c_dataBuffer; - boundPar.m_count1 = scan.m_boundCnt[1]; - boundPar.m_dir = 1; // use copy of position TreePos pos = scan.m_scanPos; // get and remember original node @@ -912,17 +823,9 @@ Dbtux::scanNext(Signal* signal, ScanOpPtr scanPtr) jam(); pos.m_ent = node.getEnt(pos.m_pos); pos.m_dir = 3; // unchanged - // XXX implement prefix optimization - ReadPar readPar; - readPar.m_ent = pos.m_ent; - readPar.m_first = 0; - readPar.m_count = frag.m_numAttrs; - readPar.m_data = 0; // leave in signal data - tupReadAttrs(signal, frag, readPar); - // compare - boundPar.m_data2 = readPar.m_data; - boundPar.m_len2 = ZNIL; // big - int ret = cmpScanBound(frag, boundPar); + // read and compare all attributes + readKeyAttrs(frag, pos.m_ent, 0, c_entryKey); + int ret = cmpScanBound(frag, 1, c_dataBuffer, scan.m_boundCnt[1], c_entryKey); ndbrequire(ret != NdbSqlUtil::CmpUnknown); if (ret < 0) { jam(); @@ -994,35 +897,25 @@ Dbtux::scanNext(Signal* signal, ScanOpPtr scanPtr) bool Dbtux::scanVisible(Signal* signal, ScanOpPtr scanPtr, TreeEnt ent) { - TupQueryTh* const req = (TupQueryTh*)signal->getDataPtrSend(); const ScanOp& scan = *scanPtr.p; const Frag& frag = *c_fragPool.getPtr(scan.m_fragPtrI); - /* Assign table, fragment, tuple address + version */ - Uint32 tableId = frag.m_tableId; Uint32 fragBit = ent.m_fragBit; + Uint32 tableFragPtrI = frag.m_tupTableFragPtrI[fragBit]; Uint32 fragId = frag.m_fragId | (fragBit << frag.m_fragOff); Uint32 tupAddr = getTupAddr(frag, ent); Uint32 tupVersion = ent.m_tupVersion; - /* Check for same tuple twice in row */ + // check for same tuple twice in row if (scan.m_lastEnt.m_tupLoc == ent.m_tupLoc && scan.m_lastEnt.m_fragBit == fragBit) { jam(); return false; } - req->tableId = tableId; - req->fragId = fragId; - req->tupAddr = tupAddr; - req->tupVersion = tupVersion; - /* Assign transaction info, trans id + savepoint id */ Uint32 transId1 = scan.m_transId1; Uint32 transId2 = scan.m_transId2; Uint32 savePointId = scan.m_savePointId; - req->transId1 = transId1; - req->transId2 = transId2; - req->savePointId = savePointId; - EXECUTE_DIRECT(DBTUP, GSN_TUP_QUERY_TH, signal, TupQueryTh::SignalLength); + bool ret = c_tup->tuxQueryTh(tableFragPtrI, tupAddr, tupVersion, transId1, transId2, savePointId); jamEntry(); - return (bool)req->returnCode; + return ret; } /* diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxSearch.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxSearch.cpp new file mode 100644 index 00000000000..84048b308bc --- /dev/null +++ b/ndb/src/kernel/blocks/dbtux/DbtuxSearch.cpp @@ -0,0 +1,333 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#define DBTUX_SEARCH_CPP +#include "Dbtux.hpp" + +/* + * Search for entry to add. + * + * Similar to searchToRemove (see below). + * + * TODO optimize for initial equal attrs in node min/max + */ +void +Dbtux::searchToAdd(Signal* signal, Frag& frag, TableData searchKey, TreeEnt searchEnt, TreePos& treePos) +{ + const TreeHead& tree = frag.m_tree; + const unsigned numAttrs = frag.m_numAttrs; + NodeHandle currNode(frag); + currNode.m_loc = tree.m_root; + if (currNode.m_loc == NullTupLoc) { + // empty tree + jam(); + treePos.m_match = false; + return; + } + NodeHandle glbNode(frag); // potential g.l.b of final node + /* + * In order to not (yet) change old behaviour, a position between + * 2 nodes returns the one at the bottom of the tree. + */ + NodeHandle bottomNode(frag); + while (true) { + jam(); + selectNode(signal, currNode, currNode.m_loc, AccPref); + int ret; + // compare prefix + unsigned start = 0; + ret = cmpSearchKey(frag, start, searchKey, currNode.getPref(), tree.m_prefSize); + if (ret == NdbSqlUtil::CmpUnknown) { + jam(); + // read and compare remaining attributes + ndbrequire(start < numAttrs); + readKeyAttrs(frag, currNode.getMinMax(0), start, c_entryKey); + ret = cmpSearchKey(frag, start, searchKey, c_entryKey); + ndbrequire(ret != NdbSqlUtil::CmpUnknown); + } + if (ret == 0) { + jam(); + // keys are equal, compare entry values + ret = searchEnt.cmp(currNode.getMinMax(0)); + } + if (ret < 0) { + jam(); + const TupLoc loc = currNode.getLink(0); + if (loc != NullTupLoc) { + jam(); + // continue to left subtree + currNode.m_loc = loc; + continue; + } + if (! glbNode.isNull()) { + jam(); + // move up to the g.l.b but remember the bottom node + bottomNode = currNode; + currNode = glbNode; + } + } else if (ret > 0) { + jam(); + const TupLoc loc = currNode.getLink(1); + if (loc != NullTupLoc) { + jam(); + // save potential g.l.b + glbNode = currNode; + // continue to right subtree + currNode.m_loc = loc; + continue; + } + } else { + jam(); + treePos.m_loc = currNode.m_loc; + treePos.m_pos = 0; + treePos.m_match = true; + return; + } + break; + } + // access rest of current node + accessNode(signal, currNode, AccFull); + for (unsigned j = 0, occup = currNode.getOccup(); j < occup; j++) { + jam(); + int ret; + // read and compare attributes + unsigned start = 0; + readKeyAttrs(frag, currNode.getEnt(j), start, c_entryKey); + ret = cmpSearchKey(frag, start, searchKey, c_entryKey); + ndbrequire(ret != NdbSqlUtil::CmpUnknown); + if (ret == 0) { + jam(); + // keys are equal, compare entry values + ret = searchEnt.cmp(currNode.getEnt(j)); + } + if (ret <= 0) { + jam(); + treePos.m_loc = currNode.m_loc; + treePos.m_pos = j; + treePos.m_match = (ret == 0); + return; + } + } + if (! bottomNode.isNull()) { + jam(); + // backwards compatible for now + treePos.m_loc = bottomNode.m_loc; + treePos.m_pos = 0; + treePos.m_match = false; + return; + } + treePos.m_loc = currNode.m_loc; + treePos.m_pos = currNode.getOccup(); + treePos.m_match = false; +} + +/* + * Search for entry to remove. + * + * Compares search key to each node min. A move to right subtree can + * overshoot target node. The last such node is saved. The final node + * is a half-leaf or leaf. If search key is less than final node min + * then the saved node is the g.l.b of the final node and we move back + * to it. + */ +void +Dbtux::searchToRemove(Signal* signal, Frag& frag, TableData searchKey, TreeEnt searchEnt, TreePos& treePos) +{ + const TreeHead& tree = frag.m_tree; + const unsigned numAttrs = frag.m_numAttrs; + NodeHandle currNode(frag); + currNode.m_loc = tree.m_root; + if (currNode.m_loc == NullTupLoc) { + // empty tree + jam(); + treePos.m_match = false; + return; + } + NodeHandle glbNode(frag); // potential g.l.b of final node + while (true) { + jam(); + selectNode(signal, currNode, currNode.m_loc, AccPref); + int ret; + // compare prefix + unsigned start = 0; + ret = cmpSearchKey(frag, start, searchKey, currNode.getPref(), tree.m_prefSize); + if (ret == NdbSqlUtil::CmpUnknown) { + jam(); + // read and compare remaining attributes + ndbrequire(start < numAttrs); + readKeyAttrs(frag, currNode.getMinMax(0), start, c_entryKey); + ret = cmpSearchKey(frag, start, searchKey, c_entryKey); + ndbrequire(ret != NdbSqlUtil::CmpUnknown); + } + if (ret == 0) { + jam(); + // keys are equal, compare entry values + ret = searchEnt.cmp(currNode.getMinMax(0)); + } + if (ret < 0) { + jam(); + const TupLoc loc = currNode.getLink(0); + if (loc != NullTupLoc) { + jam(); + // continue to left subtree + currNode.m_loc = loc; + continue; + } + if (! glbNode.isNull()) { + jam(); + // move up to the g.l.b + currNode = glbNode; + } + } else if (ret > 0) { + jam(); + const TupLoc loc = currNode.getLink(1); + if (loc != NullTupLoc) { + jam(); + // save potential g.l.b + glbNode = currNode; + // continue to right subtree + currNode.m_loc = loc; + continue; + } + } else { + jam(); + treePos.m_loc = currNode.m_loc; + treePos.m_pos = 0; + treePos.m_match = true; + return; + } + break; + } + // access rest of current node + accessNode(signal, currNode, AccFull); + // pos 0 was handled above + for (unsigned j = 1, occup = currNode.getOccup(); j < occup; j++) { + jam(); + // compare only the entry + if (searchEnt.eq(currNode.getEnt(j))) { + jam(); + treePos.m_loc = currNode.m_loc; + treePos.m_pos = j; + treePos.m_match = true; + return; + } + } + treePos.m_loc = currNode.m_loc; + treePos.m_pos = currNode.getOccup(); + treePos.m_match = false; +} + +/* + * Search for scan start position. + * + * Similar to searchToAdd. + */ +void +Dbtux::searchToScan(Signal* signal, Frag& frag, ConstData boundInfo, unsigned boundCount, TreePos& treePos) +{ + const TreeHead& tree = frag.m_tree; + NodeHandle currNode(frag); + currNode.m_loc = tree.m_root; + if (currNode.m_loc == NullTupLoc) { + // empty tree + jam(); + treePos.m_match = false; + return; + } + NodeHandle glbNode(frag); // potential g.l.b of final node + NodeHandle bottomNode(frag); + while (true) { + jam(); + selectNode(signal, currNode, currNode.m_loc, AccPref); + int ret; + // compare prefix + ret = cmpScanBound(frag, 0, boundInfo, boundCount, currNode.getPref(), tree.m_prefSize); + if (ret == NdbSqlUtil::CmpUnknown) { + jam(); + // read and compare all attributes + readKeyAttrs(frag, currNode.getMinMax(0), 0, c_entryKey); + ret = cmpScanBound(frag, 0, boundInfo, boundCount, c_entryKey); + ndbrequire(ret != NdbSqlUtil::CmpUnknown); + } + if (ret < 0) { + jam(); + const TupLoc loc = currNode.getLink(0); + if (loc != NullTupLoc) { + jam(); + // continue to left subtree + currNode.m_loc = loc; + continue; + } + if (! glbNode.isNull()) { + jam(); + // move up to the g.l.b but remember the bottom node + bottomNode = currNode; + currNode = glbNode; + } else { + // start scanning this node + treePos.m_loc = currNode.m_loc; + treePos.m_pos = 0; + treePos.m_match = false; + treePos.m_dir = 3; + return; + } + } else if (ret > 0) { + jam(); + const TupLoc loc = currNode.getLink(1); + if (loc != NullTupLoc) { + jam(); + // save potential g.l.b + glbNode = currNode; + // continue to right subtree + currNode.m_loc = loc; + continue; + } + } else { + ndbassert(false); + } + break; + } + // access rest of current node + accessNode(signal, currNode, AccFull); + for (unsigned j = 0, occup = currNode.getOccup(); j < occup; j++) { + jam(); + int ret; + // read and compare attributes + readKeyAttrs(frag, currNode.getEnt(j), 0, c_entryKey); + ret = cmpScanBound(frag, 0, boundInfo, boundCount, c_entryKey); + ndbrequire(ret != NdbSqlUtil::CmpUnknown); + if (ret < 0) { + // start scanning from current entry + treePos.m_loc = currNode.m_loc; + treePos.m_pos = j; + treePos.m_match = false; + treePos.m_dir = 3; + return; + } + } + if (! bottomNode.isNull()) { + jam(); + // start scanning the l.u.b + treePos.m_loc = bottomNode.m_loc; + treePos.m_pos = 0; + treePos.m_match = false; + treePos.m_dir = 3; + return; + } + // start scanning upwards (pretend we came from right child) + treePos.m_loc = currNode.m_loc; + treePos.m_dir = 1; +} diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxTree.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxTree.cpp index 02ed9739f3c..3baa62998db 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxTree.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxTree.cpp @@ -18,112 +18,6 @@ #include "Dbtux.hpp" /* - * Search for entry. - * - * Search key is index attribute data and tree entry value. Start from - * root node and compare the key to min/max of each node. Use linear - * search on the final (bounding) node. Initial attributes which are - * same in min/max need not be checked. - */ -void -Dbtux::treeSearch(Signal* signal, Frag& frag, TableData searchKey, TreeEnt searchEnt, TreePos& treePos) -{ - const TreeHead& tree = frag.m_tree; - const unsigned numAttrs = frag.m_numAttrs; - treePos.m_loc = tree.m_root; - if (treePos.m_loc == NullTupLoc) { - // empty tree - jam(); - treePos.m_pos = 0; - treePos.m_match = false; - return; - } - NodeHandle node(frag); -loop: { - jam(); - selectNode(signal, node, treePos.m_loc, AccPref); - const unsigned occup = node.getOccup(); - ndbrequire(occup != 0); - // number of equal initial attributes in bounding node - unsigned start = ZNIL; - for (unsigned i = 0; i <= 1; i++) { - jam(); - unsigned start1 = 0; - // compare prefix - int ret = cmpSearchKey(frag, start1, searchKey, node.getPref(i), tree.m_prefSize); - if (ret == NdbSqlUtil::CmpUnknown) { - jam(); - // read and compare remaining attributes - readKeyAttrs(frag, node.getMinMax(i), start1, c_entryKey); - ret = cmpSearchKey(frag, start1, searchKey, c_entryKey); - ndbrequire(ret != NdbSqlUtil::CmpUnknown); - } - if (start > start1) - start = start1; - if (ret == 0) { - jam(); - // keys are equal, compare entry values - ret = searchEnt.cmp(node.getMinMax(i)); - } - if (i == 0 ? (ret < 0) : (ret > 0)) { - jam(); - const TupLoc loc = node.getLink(i); - if (loc != NullTupLoc) { - jam(); - // continue to left/right subtree - treePos.m_loc = loc; - goto loop; - } - // position is immediately before/after this node - treePos.m_pos = (i == 0 ? 0 : occup); - treePos.m_match = false; - return; - } - if (ret == 0) { - jam(); - // position is at first/last entry - treePos.m_pos = (i == 0 ? 0 : occup - 1); - treePos.m_match = true; - return; - } - } - // access rest of the bounding node - accessNode(signal, node, AccFull); - // position is strictly within the node - ndbrequire(occup >= 2); - const unsigned numWithin = occup - 2; - for (unsigned j = 1; j <= numWithin; j++) { - jam(); - int ret = 0; - if (start < numAttrs) { - jam(); - // read and compare remaining attributes - unsigned start1 = start; - readKeyAttrs(frag, node.getEnt(j), start1, c_entryKey); - ret = cmpSearchKey(frag, start1, searchKey, c_entryKey); - ndbrequire(ret != NdbSqlUtil::CmpUnknown); - } - if (ret == 0) { - jam(); - // keys are equal, compare entry values - ret = searchEnt.cmp(node.getEnt(j)); - } - if (ret <= 0) { - jam(); - // position is before or at this entry - treePos.m_pos = j; - treePos.m_match = (ret == 0); - return; - } - } - // position is before last entry - treePos.m_pos = occup - 1; - treePos.m_match = false; - return; - } -} - -/* * Add entry. */ void diff --git a/ndb/src/kernel/blocks/dbtux/Makefile.am b/ndb/src/kernel/blocks/dbtux/Makefile.am index 0b48ad5724f..7d012924522 100644 --- a/ndb/src/kernel/blocks/dbtux/Makefile.am +++ b/ndb/src/kernel/blocks/dbtux/Makefile.am @@ -7,6 +7,7 @@ libdbtux_a_SOURCES = \ DbtuxNode.cpp \ DbtuxTree.cpp \ DbtuxScan.cpp \ + DbtuxSearch.cpp \ DbtuxCmp.cpp \ DbtuxDebug.cpp diff --git a/ndb/src/kernel/blocks/dbtux/Times.txt b/ndb/src/kernel/blocks/dbtux/Times.txt index 16c4102249b..c4744a23c07 100644 --- a/ndb/src/kernel/blocks/dbtux/Times.txt +++ b/ndb/src/kernel/blocks/dbtux/Times.txt @@ -1,17 +1,32 @@ -index maintenance overhead -========================== +ordered index performance +========================= "mc02" 2x1700 MHz linux-2.4.9 gcc-2.96 -O3 one db-node -case a: index on Unsigned -testOIBasic -case u -table 1 -index 1 -fragtype small -threads 10 -rows 100000 -subloop 1 -nologging +case a: maintenance: index on Unsigned +testOIBasic -case u -table 1 -index 2 -fragtype small -threads 10 -rows 100000 -subloop 1 -nologging -case b: index on Varchar(5) + Varchar(5) + Varchar(20) + Unsigned -testOIBasic -case u -table 2 -index 4 -fragtype small -threads 10 -rows 100000 -subloop 1 -nologging +case b: maintenance: index on Varchar(5) + Varchar(5) + Varchar(20) + Unsigned +testOIBasic -case u -table 2 -index 5 -fragtype small -threads 10 -rows 100000 -subloop 1 -nologging +case c: full scan: index on PK Unsigned +testOIBasic -case v -table 1 -index 1 -fragtype small -threads 10 -rows 100000 -subloop 1 -nologging + +case d: scan 1 tuple via EQ: index on PK Unsigned +testOIBasic -case w -table 1 -index 1 -fragtype small -threads 10 -rows 100000 -samples 10000 -subloop 1 -nologging -v2 + +a, b 1 million rows, pk update without index, pk update with index shows ms / 1000 rows for each and pct overhead -the figures are based on single run on idle machine + +c +1 million rows, index on PK, full table scan, full index scan +shows ms / 1000 rows for each and index time overhead + +d +1 million rows, index on PK, read table via each pk, scan index for each pk +shows ms / 1000 rows for each and index time overhead +samples 10% of all PKs (100,000 pk reads, 100,000 scans) 040616 mc02/a 40 ms 87 ms 114 pct mc02/b 51 ms 128 ms 148 pct @@ -49,4 +64,22 @@ optim 10 mc02/a 44 ms 65 ms 46 pct optim 11 mc02/a 43 ms 63 ms 46 pct mc02/b 52 ms 86 ms 63 pct +optim 12 mc02/a 38 ms 55 ms 43 pct + mc02/b 47 ms 77 ms 63 pct + mc02/c 10 ms 14 ms 47 pct + mc02/d 176 ms 281 ms 59 pct + +optim 13 mc02/a 40 ms 57 ms 42 pct + mc02/b 47 ms 77 ms 61 pct + mc02/c 9 ms 13 ms 50 pct + mc02/d 170 ms 256 ms 50 pct + +after wl-1884 store all-NULL keys (the tests have pctnull=10 per column) +[ what happened to PK read performance? ] + +optim 13 mc02/a 39 ms 59 ms 50 pct + mc02/b 47 ms 77 ms 61 pct + mc02/c 9 ms 12 ms 44 pct + mc02/d 246 ms 289 ms 17 pct + vim: set et: diff --git a/ndb/src/mgmapi/Makefile.am b/ndb/src/mgmapi/Makefile.am index e4fa1d449c6..bf209ddccb5 100644 --- a/ndb/src/mgmapi/Makefile.am +++ b/ndb/src/mgmapi/Makefile.am @@ -9,5 +9,10 @@ DEFS_LOC = -DNO_DEBUG_MESSAGES include $(top_srcdir)/ndb/config/common.mk.am include $(top_srcdir)/ndb/config/type_util.mk.am +#ndbtest_PROGRAMS = ndb_test_mgmapi +ndb_test_mgmapi_SOURCES = test_mgmapi.cpp +ndb_test_mgmapi_LDFLAGS = @ndb_bin_am_ldflags@ \ + $(top_builddir)/ndb/src/libndbclient.la + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp index ca77ae9fb63..ccc63cc7e70 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -172,7 +172,7 @@ MgmtSrvr::signalRecvThreadRun() siglist.push_back(SigMatch(GSN_MGM_UNLOCK_CONFIG_REQ, &MgmtSrvr::handle_MGM_UNLOCK_CONFIG_REQ)); - while(1) { + while(!_isStopThread) { SigMatch *handler = NULL; NdbApiSignal *signal = NULL; if(m_signalRecvQueue.waitFor(siglist, handler, signal)) { @@ -413,14 +413,18 @@ MgmtSrvr::getPort() const { ndbout << "Local node id " << getOwnNodeId() << " is not defined as management server" << endl << "Have you set correct NodeId for this node?" << endl; + ndb_mgm_destroy_iterator(iter); return 0; } Uint32 port = 0; if(ndb_mgm_get_int_parameter(iter, CFG_MGM_PORT, &port) != 0){ ndbout << "Could not find PortNumber in the configuration file." << endl; + ndb_mgm_destroy_iterator(iter); return 0; } + + ndb_mgm_destroy_iterator(iter); /***************** * Set Stat Port * @@ -515,6 +519,7 @@ MgmtSrvr::MgmtSrvr(NodeId nodeId, _isStopThread = false; _logLevelThread = NULL; _logLevelThreadSleep = 500; + m_signalRecvThread = NULL; _startedNodeId = 0; theFacade = 0; @@ -696,6 +701,11 @@ MgmtSrvr::~MgmtSrvr() NdbThread_WaitFor(_logLevelThread, &res); NdbThread_Destroy(&_logLevelThread); } + + if (m_signalRecvThread != NULL) { + NdbThread_WaitFor(m_signalRecvThread, &res); + NdbThread_Destroy(&m_signalRecvThread); + } } //**************************************************************************** diff --git a/ndb/src/ndbapi/NdbScanOperation.cpp b/ndb/src/ndbapi/NdbScanOperation.cpp index 603ae85ad65..7d51974da7c 100644 --- a/ndb/src/ndbapi/NdbScanOperation.cpp +++ b/ndb/src/ndbapi/NdbScanOperation.cpp @@ -1120,7 +1120,6 @@ NdbIndexScanOperation::setBound(const NdbColumnImpl* tAttrInfo, if (theOperationType == OpenRangeScanRequest && theStatus == SetBound && (0 <= type && type <= 4) && - aValue != NULL && len <= 8000) { // bound type @@ -1131,20 +1130,22 @@ NdbIndexScanOperation::setBound(const NdbColumnImpl* tAttrInfo, setErrorCodeAbort(4209); return -1; } - len = sizeInBytes; + len = aValue != NULL ? sizeInBytes : 0; Uint32 tIndexAttrId = tAttrInfo->m_attrId; Uint32 sizeInWords = (len + 3) / 4; AttributeHeader ah(tIndexAttrId, sizeInWords); insertATTRINFO(ah.m_value); - // attribute data - if ((UintPtr(aValue) & 0x3) == 0 && (len & 0x3) == 0) - insertATTRINFOloop((const Uint32*)aValue, sizeInWords); - else { - Uint32 temp[2000]; - memcpy(temp, aValue, len); - while ((len & 0x3) != 0) - ((char*)temp)[len++] = 0; - insertATTRINFOloop(temp, sizeInWords); + if (len != 0) { + // attribute data + if ((UintPtr(aValue) & 0x3) == 0 && (len & 0x3) == 0) + insertATTRINFOloop((const Uint32*)aValue, sizeInWords); + else { + Uint32 temp[2000]; + memcpy(temp, aValue, len); + while ((len & 0x3) != 0) + ((char*)temp)[len++] = 0; + insertATTRINFOloop(temp, sizeInWords); + } } /** @@ -1231,7 +1232,7 @@ NdbIndexScanOperation::compare(Uint32 skip, Uint32 cols, Uint32 * d2 = (Uint32*)r2->aRef(); unsigned r1_null = r1->isNULL(); if((r1_null ^ (unsigned)r2->isNULL())){ - return (r1_null ? 1 : -1); + return (r1_null ? -1 : 1); } Uint32 type = NdbColumnImpl::getImpl(* r1->m_column).m_extType; Uint32 size = (r1->theAttrSize * r1->theArraySize + 3) / 4; diff --git a/ndb/src/ndbapi/Ndbif.cpp b/ndb/src/ndbapi/Ndbif.cpp index ee59e661cfb..7ad37401b9a 100644 --- a/ndb/src/ndbapi/Ndbif.cpp +++ b/ndb/src/ndbapi/Ndbif.cpp @@ -350,47 +350,46 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3]) return; } - case GSN_TRANSID_AI: - { - tFirstDataPtr = int2void(tFirstData); - assert(tFirstDataPtr); - if (tFirstDataPtr == 0) goto InvalidSignal; - NdbReceiver* tRec = void2rec(tFirstDataPtr); - assert(tRec->checkMagicNumber()); - assert(tRec->getTransaction()); - assert(tRec->getTransaction()->checkState_TransId(((const TransIdAI*)tDataPtr)->transId)); - if(tRec->checkMagicNumber() && (tCon = tRec->getTransaction()) && - tCon->checkState_TransId(((const TransIdAI*)tDataPtr)->transId)){ - Uint32 com; - if(aSignal->m_noOfSections > 0){ - com = tRec->execTRANSID_AI(ptr[0].p, ptr[0].sz); - } else { - com = tRec->execTRANSID_AI(tDataPtr + TransIdAI::HeaderLength, - tLen - TransIdAI::HeaderLength); - } - - if(com == 1){ - switch(tRec->getType()){ - case NdbReceiver::NDB_OPERATION: - case NdbReceiver::NDB_INDEX_OPERATION: - if(tCon->OpCompleteSuccess() != -1){ - completedTransaction(tCon); - return; - } - break; - case NdbReceiver::NDB_SCANRECEIVER: - tCon->theScanningOp->receiver_delivered(tRec); - theWaiter.m_state = (tWaitState == WAIT_SCAN? NO_WAIT: tWaitState); - break; - default: - goto InvalidSignal; + case GSN_TRANSID_AI:{ + tFirstDataPtr = int2void(tFirstData); + NdbReceiver* tRec; + if (tFirstDataPtr && (tRec = void2rec(tFirstDataPtr)) && + tRec->checkMagicNumber() && (tCon = tRec->getTransaction()) && + tCon->checkState_TransId(((const TransIdAI*)tDataPtr)->transId)){ + Uint32 com; + if(aSignal->m_noOfSections > 0){ + com = tRec->execTRANSID_AI(ptr[0].p, ptr[0].sz); + } else { + com = tRec->execTRANSID_AI(tDataPtr + TransIdAI::HeaderLength, + tLen - TransIdAI::HeaderLength); + } + + if(com == 1){ + switch(tRec->getType()){ + case NdbReceiver::NDB_OPERATION: + case NdbReceiver::NDB_INDEX_OPERATION: + if(tCon->OpCompleteSuccess() != -1){ + completedTransaction(tCon); + return; } + break; + case NdbReceiver::NDB_SCANRECEIVER: + tCon->theScanningOp->receiver_delivered(tRec); + theWaiter.m_state = (tWaitState == WAIT_SCAN ? NO_WAIT : tWaitState); + break; + default: + goto InvalidSignal; } - break; - } else { - goto InvalidSignal; } + break; + } else { + /** + * This is ok as transaction can have been aborted before TRANSID_AI + * arrives (if TUP on other node than TC) + */ + return; } + } case GSN_TCKEY_FAILCONF: { tFirstDataPtr = int2void(tFirstData); @@ -695,7 +694,8 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3]) (tCon = void2con(tFirstDataPtr)) && (tCon->checkMagicNumber() == 0)){ if(aSignal->m_noOfSections > 0){ - tReturnCode = tCon->receiveSCAN_TABCONF(aSignal, ptr[0].p, ptr[0].sz); + tReturnCode = tCon->receiveSCAN_TABCONF(aSignal, + ptr[0].p, ptr[0].sz); } else { tReturnCode = tCon->receiveSCAN_TABCONF(aSignal, @@ -730,12 +730,11 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3]) } case GSN_KEYINFO20: { tFirstDataPtr = int2void(tFirstData); - if (tFirstDataPtr == 0) goto InvalidSignal; - NdbReceiver* tRec = void2rec(tFirstDataPtr); - - if(tRec->checkMagicNumber() && (tCon = tRec->getTransaction()) && - tCon->checkState_TransId(&((const KeyInfo20*)tDataPtr)->transId1)){ - + NdbReceiver* tRec; + if (tFirstDataPtr && (tRec = void2rec(tFirstDataPtr)) && + tRec->checkMagicNumber() && (tCon = tRec->getTransaction()) && + tCon->checkState_TransId(&((const KeyInfo20*)tDataPtr)->transId1)){ + Uint32 len = ((const KeyInfo20*)tDataPtr)->keyLen; Uint32 info = ((const KeyInfo20*)tDataPtr)->scanInfo_Node; int com = -1; @@ -756,8 +755,13 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3]) goto InvalidSignal; } break; + } else { + /** + * This is ok as transaction can have been aborted before KEYINFO20 + * arrives (if TUP on other node than TC) + */ + return; } - goto InvalidSignal; } case GSN_TCINDXCONF:{ tFirstDataPtr = int2void(tFirstData); |