From 52a014c7c646f4b4fa5c3117675671c6668cd1ac Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 13 Aug 2007 09:22:42 +0200 Subject: ndb - bug#28804 Handle out of transaction buffer in TC for INDX lookups ndb/src/kernel/blocks/ERROR_codes.txt: Add new error codes for simulating out of transaction buffer memory ndb/src/kernel/blocks/dbtc/Dbtc.hpp: Change signature to handle out of buffer ndb/src/kernel/blocks/dbtc/DbtcMain.cpp: Handle otu of transaction buffers in index operations (TCINDXREQ++) ndb/src/ndbapi/NdbTransaction.cpp: Give more info on 4012 ndb/src/ndbapi/ndberror.c: Add new error code ndb/test/ndbapi/testIndex.cpp: add tests ndb/test/run-test/daily-basic-tests.txt: add tests sql/ha_ndbcluster.cc: Set correct status --- ndb/src/kernel/blocks/ERROR_codes.txt | 6 +- ndb/src/kernel/blocks/dbtc/Dbtc.hpp | 10 +- ndb/src/kernel/blocks/dbtc/DbtcMain.cpp | 163 ++++++++++++++++++++++++-------- ndb/src/ndbapi/NdbTransaction.cpp | 38 ++++++-- ndb/src/ndbapi/ndberror.c | 2 + ndb/test/ndbapi/testIndex.cpp | 117 +++++++++++++++++++++++ ndb/test/run-test/daily-basic-tests.txt | 8 ++ 7 files changed, 291 insertions(+), 53 deletions(-) (limited to 'ndb') diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index 17f2c35624a..e45c608b601 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -6,7 +6,7 @@ Next DBTUP 4014 Next DBLQH 5043 Next DBDICT 6007 Next DBDIH 7183 -Next DBTC 8039 +Next DBTC 8052 Next CMVMI 9000 Next BACKUP 10022 Next DBUTIL 11002 @@ -296,6 +296,10 @@ ABORT OF TCKEYREQ 8038 : Simulate API disconnect just after SCAN_TAB_REQ +8039 : Simulate failure of TransactionBufferMemory allocation for OI lookup + +8051 : Simulate failure of allocation for saveINDXKEYINFO + CMVMI ----- diff --git a/ndb/src/kernel/blocks/dbtc/Dbtc.hpp b/ndb/src/kernel/blocks/dbtc/Dbtc.hpp index 6934de76ad3..710d2fde182 100644 --- a/ndb/src/kernel/blocks/dbtc/Dbtc.hpp +++ b/ndb/src/kernel/blocks/dbtc/Dbtc.hpp @@ -1497,12 +1497,12 @@ private: void clearCommitAckMarker(ApiConnectRecord * const regApiPtr, TcConnectRecord * const regTcPtr); // Trigger and index handling - bool saveINDXKEYINFO(Signal* signal, - TcIndexOperation* indexOp, - const Uint32 *src, - Uint32 len); + int saveINDXKEYINFO(Signal* signal, + TcIndexOperation* indexOp, + const Uint32 *src, + Uint32 len); bool receivedAllINDXKEYINFO(TcIndexOperation* indexOp); - bool saveINDXATTRINFO(Signal* signal, + int saveINDXATTRINFO(Signal* signal, TcIndexOperation* indexOp, const Uint32 *src, Uint32 len); diff --git a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp index e2df1249661..60024e82978 100644 --- a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp +++ b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp @@ -1789,9 +1789,18 @@ start_failure: }//switch } +static +inline +bool +compare_transid(Uint32* val0, Uint32* val1) +{ + Uint32 tmp0 = val0[0] ^ val1[0]; + Uint32 tmp1 = val0[1] ^ val1[1]; + return (tmp0 | tmp1) == 0; +} + void Dbtc::execKEYINFO(Signal* signal) { - UintR compare_transid1, compare_transid2; jamEntry(); apiConnectptr.i = signal->theData[0]; tmaxData = 20; @@ -1801,10 +1810,8 @@ void Dbtc::execKEYINFO(Signal* signal) }//if ptrAss(apiConnectptr, apiConnectRecord); ttransid_ptr = 1; - compare_transid1 = apiConnectptr.p->transid[0] ^ signal->theData[1]; - compare_transid2 = apiConnectptr.p->transid[1] ^ signal->theData[2]; - compare_transid1 = compare_transid1 | compare_transid2; - if (compare_transid1 != 0) { + if (compare_transid(apiConnectptr.p->transid, signal->theData+1) == false) + { TCKEY_abort(signal, 19); return; }//if @@ -2105,7 +2112,6 @@ void Dbtc::saveAttrbuf(Signal* signal) void Dbtc::execATTRINFO(Signal* signal) { - UintR compare_transid1, compare_transid2; UintR Tdata1 = signal->theData[0]; UintR Tlength = signal->length(); UintR TapiConnectFilesize = capiConnectFilesize; @@ -2120,17 +2126,13 @@ void Dbtc::execATTRINFO(Signal* signal) return; }//if - UintR Tdata2 = signal->theData[1]; - UintR Tdata3 = signal->theData[2]; ApiConnectRecord * const regApiPtr = &localApiConnectRecord[Tdata1]; - compare_transid1 = regApiPtr->transid[0] ^ Tdata2; - compare_transid2 = regApiPtr->transid[1] ^ Tdata3; apiConnectptr.p = regApiPtr; - compare_transid1 = compare_transid1 | compare_transid2; - if (compare_transid1 != 0) { + if (compare_transid(regApiPtr->transid, signal->theData+1) == false) + { DEBUG("Drop ATTRINFO, wrong transid, lenght="<theData[1]<<", "<theData[2]); TCKEY_abort(signal, 19); return; }//if @@ -5456,11 +5458,32 @@ void Dbtc::execTC_COMMITREQ(Signal* signal) } }//Dbtc::execTC_COMMITREQ() +/** + * TCROLLBACKREQ + * + * Format is: + * + * thedata[0] = apiconnectptr + * thedata[1] = transid[0] + * thedata[2] = transid[1] + * OPTIONAL thedata[3] = flags + * + * Flags: + * 0x1 = potentiallyBad data from API (try not to assert) + */ void Dbtc::execTCROLLBACKREQ(Signal* signal) { + bool potentiallyBad= false; UintR compare_transid1, compare_transid2; jamEntry(); + + if(unlikely((signal->getLength() >= 4) && (signal->theData[3] & 0x1))) + { + ndbout_c("Trying to roll back potentially bad txn\n"); + potentiallyBad= true; + } + apiConnectptr.i = signal->theData[0]; if (apiConnectptr.i >= capiConnectFilesize) { goto TC_ROLL_warning; @@ -5547,12 +5570,14 @@ void Dbtc::execTCROLLBACKREQ(Signal* signal) TC_ROLL_warning: jam(); - warningHandlerLab(signal, __LINE__); + if(likely(potentiallyBad==false)) + warningHandlerLab(signal, __LINE__); return; TC_ROLL_system_error: jam(); - systemErrorLab(signal, __LINE__); + if(likely(potentiallyBad==false)) + systemErrorLab(signal, __LINE__); return; }//Dbtc::execTCROLLBACKREQ() @@ -11559,6 +11584,7 @@ void Dbtc::execTCINDXREQ(Signal* signal) // This is a newly started transaction, clean-up releaseAllSeizedIndexOperations(regApiPtr); + regApiPtr->apiConnectstate = CS_STARTED; regApiPtr->transid[0] = tcIndxReq->transId1; regApiPtr->transid[1] = tcIndxReq->transId2; }//if @@ -11599,20 +11625,29 @@ void Dbtc::execTCINDXREQ(Signal* signal) Uint32 includedIndexLength = MIN(indexLength, indexBufSize); indexOp->expectedAttrInfo = attrLength; Uint32 includedAttrLength = MIN(attrLength, attrBufSize); - if (saveINDXKEYINFO(signal, - indexOp, - dataPtr, - includedIndexLength)) { + + int ret; + if ((ret = saveINDXKEYINFO(signal, + indexOp, + dataPtr, + includedIndexLength)) == 0) + { jam(); // We have received all we need readIndexTable(signal, regApiPtr, indexOp); return; } + else if (ret == -1) + { + jam(); + return; + } + dataPtr += includedIndexLength; if (saveINDXATTRINFO(signal, indexOp, dataPtr, - includedAttrLength)) { + includedAttrLength) == 0) { jam(); // We have received all we need readIndexTable(signal, regApiPtr, indexOp); @@ -11715,13 +11750,25 @@ void Dbtc::execINDXKEYINFO(Signal* signal) TcIndexOperationPtr indexOpPtr; TcIndexOperation* indexOp; + if (compare_transid(regApiPtr->transid, indxKeyInfo->transId) == false) + { + TCKEY_abort(signal, 19); + return; + } + + if (regApiPtr->apiConnectstate == CS_ABORTING) + { + jam(); + return; + } + if((indexOpPtr.i = regApiPtr->accumulatingIndexOp) != RNIL) { indexOp = c_theIndexOperationPool.getPtr(indexOpPtr.i); if (saveINDXKEYINFO(signal, indexOp, src, - keyInfoLength)) { + keyInfoLength) == 0) { jam(); // We have received all we need readIndexTable(signal, regApiPtr, indexOp); @@ -11748,17 +11795,31 @@ void Dbtc::execINDXATTRINFO(Signal* signal) TcIndexOperationPtr indexOpPtr; TcIndexOperation* indexOp; + if (compare_transid(regApiPtr->transid, indxAttrInfo->transId) == false) + { + TCKEY_abort(signal, 19); + return; + } + + if (regApiPtr->apiConnectstate == CS_ABORTING) + { + jam(); + return; + } + if((indexOpPtr.i = regApiPtr->accumulatingIndexOp) != RNIL) { indexOp = c_theIndexOperationPool.getPtr(indexOpPtr.i); if (saveINDXATTRINFO(signal, indexOp, src, - attrInfoLength)) { + attrInfoLength) == 0) { jam(); // We have received all we need readIndexTable(signal, regApiPtr, indexOp); + return; } + return; } } @@ -11766,12 +11827,13 @@ void Dbtc::execINDXATTRINFO(Signal* signal) * Save signal INDXKEYINFO * Return true if we have received all needed data */ -bool Dbtc::saveINDXKEYINFO(Signal* signal, - TcIndexOperation* indexOp, - const Uint32 *src, - Uint32 len) +int +Dbtc::saveINDXKEYINFO(Signal* signal, + TcIndexOperation* indexOp, + const Uint32 *src, + Uint32 len) { - if (!indexOp->keyInfo.append(src, len)) { + if (ERROR_INSERTED(8039) || !indexOp->keyInfo.append(src, len)) { jam(); // Failed to seize keyInfo, abort transaction #ifdef VM_TRACE @@ -11781,15 +11843,17 @@ bool Dbtc::saveINDXKEYINFO(Signal* signal, apiConnectptr.i = indexOp->connectionIndex; ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord); releaseIndexOperation(apiConnectptr.p, indexOp); - terrorCode = 4000; + terrorCode = 289; + if(TcKeyReq::getExecuteFlag(indexOp->tcIndxReq.requestInfo)) + apiConnectptr.p->m_exec_flag= 1; abortErrorLab(signal); - return false; + return -1; } if (receivedAllINDXKEYINFO(indexOp) && receivedAllINDXATTRINFO(indexOp)) { jam(); - return true; + return 0; } - return false; + return 1; } bool Dbtc::receivedAllINDXKEYINFO(TcIndexOperation* indexOp) @@ -11801,12 +11865,13 @@ bool Dbtc::receivedAllINDXKEYINFO(TcIndexOperation* indexOp) * Save signal INDXATTRINFO * Return true if we have received all needed data */ -bool Dbtc::saveINDXATTRINFO(Signal* signal, - TcIndexOperation* indexOp, - const Uint32 *src, - Uint32 len) +int +Dbtc::saveINDXATTRINFO(Signal* signal, + TcIndexOperation* indexOp, + const Uint32 *src, + Uint32 len) { - if (!indexOp->attrInfo.append(src, len)) { + if (ERROR_INSERTED(8051) || !indexOp->attrInfo.append(src, len)) { jam(); #ifdef VM_TRACE ndbout_c("Dbtc::saveINDXATTRINFO: Failed to seize attrInfo\n"); @@ -11814,15 +11879,17 @@ bool Dbtc::saveINDXATTRINFO(Signal* signal, apiConnectptr.i = indexOp->connectionIndex; ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord); releaseIndexOperation(apiConnectptr.p, indexOp); - terrorCode = 4000; + terrorCode = 289; + if(TcKeyReq::getExecuteFlag(indexOp->tcIndxReq.requestInfo)) + apiConnectptr.p->m_exec_flag= 1; abortErrorLab(signal); - return false; + return -1; } if (receivedAllINDXKEYINFO(indexOp) && receivedAllINDXATTRINFO(indexOp)) { jam(); - return true; + return 0; } - return false; + return 1; } bool Dbtc::receivedAllINDXATTRINFO(TcIndexOperation* indexOp) @@ -12006,6 +12073,9 @@ void Dbtc::execTCKEYREF(Signal* signal) tcIndxRef->transId[0] = tcKeyRef->transId[0]; tcIndxRef->transId[1] = tcKeyRef->transId[1]; tcIndxRef->errorCode = tcKeyRef->errorCode; + + releaseIndexOperation(regApiPtr, indexOp); + sendSignal(regApiPtr->ndbapiBlockref, GSN_TCINDXREF, signal, TcKeyRef::SignalLength, JBB); return; @@ -12538,7 +12608,18 @@ void Dbtc::executeIndexOperation(Signal* signal, bool Dbtc::seizeIndexOperation(ApiConnectRecord* regApiPtr, TcIndexOperationPtr& indexOpPtr) { - return regApiPtr->theSeizedIndexOperations.seize(indexOpPtr); + if (regApiPtr->theSeizedIndexOperations.seize(indexOpPtr)) + { + ndbassert(indexOpPtr.p->expectedKeyInfo == 0); + ndbassert(indexOpPtr.p->keyInfo.getSize() == 0); + ndbassert(indexOpPtr.p->expectedAttrInfo == 0); + ndbassert(indexOpPtr.p->attrInfo.getSize() == 0); + ndbassert(indexOpPtr.p->expectedTransIdAI == 0); + ndbassert(indexOpPtr.p->transIdAI.getSize() == 0); + return true; + } + + return false; } void Dbtc::releaseIndexOperation(ApiConnectRecord* regApiPtr, diff --git a/ndb/src/ndbapi/NdbTransaction.cpp b/ndb/src/ndbapi/NdbTransaction.cpp index f5076ff2020..1ebc5b7ef24 100644 --- a/ndb/src/ndbapi/NdbTransaction.cpp +++ b/ndb/src/ndbapi/NdbTransaction.cpp @@ -481,12 +481,27 @@ NdbTransaction::executeNoBlobs(ExecType aTypeOfExec, while (1) { int noOfComp = tNdb->sendPollNdb(3 * timeout, 1, forceSend); if (noOfComp == 0) { - /** - * This timeout situation can occur if NDB crashes. + /* + * Just for fun, this is only one of two places where + * we could hit this error... It's quite possible we + * hit it in Ndbif.cpp in Ndb::check_send_timeout() + * + * We behave rather similarly in both places. + * Hitting this is certainly a bug though... */ - ndbout << "This timeout should never occur, execute(..)" << endl; - theError.code = 4012; - setOperationErrorCodeAbort(4012); // Error code for "Cluster Failure" + g_eventLogger.error("WARNING: Timeout in executeNoBlobs() waiting for " + "response from NDB data nodes. This should NEVER " + "occur. You have likely hit a NDB Bug. Please " + "file a bug."); + DBUG_PRINT("error",("This timeout should never occure, execute()")); + g_eventLogger.error("Forcibly trying to rollback txn (%p" + ") to try to clean up data node resources.", + this); + executeNoBlobs(NdbTransaction::Rollback); + theError.code = 4012; + theError.status= NdbError::PermanentError; + theError.classification= NdbError::TimeoutExpired; + setOperationErrorCodeAbort(4012); // ndbd timeout DBUG_RETURN(-1); }//if @@ -550,7 +565,12 @@ NdbTransaction::executeAsynchPrepare( ExecType aTypeOfExec, */ if (theError.code != 0) DBUG_PRINT("enter", ("Resetting error %d on execute", theError.code)); - theError.code = 0; + /** + * for timeout (4012) we want sendROLLBACK to behave differently. + * Else, normal behaviour of reset errcode + */ + if (theError.code != 4012) + theError.code = 0; NdbScanOperation* tcOp = m_theFirstScanOperation; if (tcOp != 0){ // Execute any cursor operations @@ -873,6 +893,12 @@ NdbTransaction::sendROLLBACK() // Send a TCROLLBACKREQ signal; tSignal.setData(theTCConPtr, 1); tSignal.setData(tTransId1, 2); tSignal.setData(tTransId2, 3); + if(theError.code == 4012) + { + g_eventLogger.error("Sending TCROLLBACKREQ with Bad flag"); + tSignal.setLength(tSignal.getLength() + 1); // + flags + tSignal.setData(0x1, 4); // potentially bad data + } tReturnCode = tp->sendSignal(&tSignal,theDBnode); if (tReturnCode != -1) { theSendStatus = sendTC_ROLLBACK; diff --git a/ndb/src/ndbapi/ndberror.c b/ndb/src/ndbapi/ndberror.c index 328b0688857..24ccb1d07c2 100644 --- a/ndb/src/ndbapi/ndberror.c +++ b/ndb/src/ndbapi/ndberror.c @@ -173,6 +173,8 @@ ErrorBundle ErrorCodes[] = { { 4022, TR, "Out of Send Buffer space in NDB API" }, { 4032, TR, "Out of Send Buffer space in NDB API" }, { 288, TR, "Out of index operations in transaction coordinator (increase MaxNoOfConcurrentIndexOperations)" }, + { 289, TR, "Out of transaction buffer memory in TC (increase TransactionBufferMemory)" }, + /** * InsufficientSpace */ diff --git a/ndb/test/ndbapi/testIndex.cpp b/ndb/test/ndbapi/testIndex.cpp index 78672cd519f..f715db1ef8c 100644 --- a/ndb/test/ndbapi/testIndex.cpp +++ b/ndb/test/ndbapi/testIndex.cpp @@ -1297,6 +1297,102 @@ runBug25059(NDBT_Context* ctx, NDBT_Step* step) return res; } +int tcSaveINDX_test(NDBT_Context* ctx, NDBT_Step* step, int inject_err) +{ + int result= NDBT_OK; + Ndb* pNdb = GETNDB(step); + NdbDictionary::Dictionary * dict = pNdb->getDictionary(); + const NdbDictionary::Index * idx = dict->getIndex(pkIdxName, *ctx->getTab()); + + HugoOperations ops(*ctx->getTab(), idx); + + g_err << "Using INDEX: " << pkIdxName << endl; + + NdbRestarter restarter; + + int loops = ctx->getNumLoops(); + const int rows = ctx->getNumRecords(); + const int batchsize = ctx->getProperty("BatchSize", 1); + + for(int bs=1; bs < loops; bs++) + { + int c= 0; + while (c++ < loops) + { + g_err << "BS " << bs << " LOOP #" << c << endl; + + g_err << "inserting error on op#" << c << endl; + + CHECK(ops.startTransaction(pNdb) == 0); + for(int i=1;i<=c;i++) + { + if(i==c) + { + if(restarter.insertErrorInAllNodes(inject_err)!=0) + { + g_err << "**** FAILED to insert error" << endl; + result= NDBT_FAILED; + break; + } + } + CHECK(ops.indexReadRecords(pNdb, pkIdxName, i,false,1) == 0); + if(i%bs==0 || i==c) + { + if(i