summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <tomas@poseidon.ndb.mysql.com>2006-01-30 11:09:55 +0100
committerunknown <tomas@poseidon.ndb.mysql.com>2006-01-30 11:09:55 +0100
commit1dc94a9dcf0ffadd4199c16b935b99ee6012d6e6 (patch)
tree27e94188f8dc23ecad42288eaa3857f0447beffb
parent43f6f1b9d5175c6f8d46e9a859219e7c520bf62c (diff)
downloadmariadb-git-1dc94a9dcf0ffadd4199c16b935b99ee6012d6e6.tar.gz
MySQL Bugs: #16772: Starting node joins cluster too early, workaround avoiding the issue for dynamically allocated nodeid's
storage/ndb/include/kernel/signaldata/AllocNodeId.hpp: New BitKeeper file ``storage/ndb/include/kernel/signaldata/AllocNodeId.hpp''
-rw-r--r--storage/ndb/include/kernel/GlobalSignalNumbers.h6
-rw-r--r--storage/ndb/include/kernel/signaldata/AllocNodeId.hpp65
-rw-r--r--storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp3
-rw-r--r--storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp18
-rw-r--r--storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp5
-rw-r--r--storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp164
-rw-r--r--storage/ndb/src/mgmsrv/MgmtSrvr.cpp116
-rw-r--r--storage/ndb/src/mgmsrv/MgmtSrvr.hpp3
-rw-r--r--storage/ndb/src/ndbapi/ndberror.c10
9 files changed, 386 insertions, 4 deletions
diff --git a/storage/ndb/include/kernel/GlobalSignalNumbers.h b/storage/ndb/include/kernel/GlobalSignalNumbers.h
index b05b79cf176..c3acd30369f 100644
--- a/storage/ndb/include/kernel/GlobalSignalNumbers.h
+++ b/storage/ndb/include/kernel/GlobalSignalNumbers.h
@@ -111,9 +111,9 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES;
/* 57 unused */
/* 58 unused */
/* 59 unused */
-/* 60 unused */
-/* 61 unused */
-/* 62 unused */
+#define GSN_ALLOC_NODEID_REQ 60
+#define GSN_ALLOC_NODEID_CONF 61
+#define GSN_ALLOC_NODEID_REF 62
/* 63 unused */
/* 64 unused */
/* 65 unused */
diff --git a/storage/ndb/include/kernel/signaldata/AllocNodeId.hpp b/storage/ndb/include/kernel/signaldata/AllocNodeId.hpp
new file mode 100644
index 00000000000..40b30a573e1
--- /dev/null
+++ b/storage/ndb/include/kernel/signaldata/AllocNodeId.hpp
@@ -0,0 +1,65 @@
+/* Copyright (C) 2003 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef ALLOC_NODE_ID_HPP
+#define ALLOC_NODE_ID_HPP
+
+#include "SignalData.hpp"
+#include <NodeBitmask.hpp>
+
+/**
+ * Request to allocate node id
+ */
+class AllocNodeIdReq {
+public:
+ STATIC_CONST( SignalLength = 3 );
+
+ Uint32 senderRef;
+ Uint32 senderData;
+ Uint32 nodeId;
+};
+
+class AllocNodeIdConf {
+public:
+ STATIC_CONST( SignalLength = 3 );
+
+ Uint32 senderRef;
+ Uint32 senderData;
+ Uint32 nodeId;
+};
+
+class AllocNodeIdRef {
+public:
+ STATIC_CONST( SignalLength = 5 );
+
+ enum ErrorCodes {
+ NoError = 0,
+ Undefined = 1,
+ NF_FakeErrorREF = 11,
+ Busy = 701,
+ NotMaster = 702,
+ NodeReserved = 1701,
+ NodeConnected = 1702,
+ NodeFailureHandlingNotCompleted = 1703
+ };
+
+ Uint32 senderRef;
+ Uint32 senderData;
+ Uint32 nodeId;
+ Uint32 errorCode;
+ Uint32 masterRef;
+};
+#endif
diff --git a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
index d2f4c7d57cc..86c9aa36b94 100644
--- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
+++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
@@ -1509,6 +1509,9 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal)
sendSignal(SUMA_REF, GSN_NODE_FAILREP, signal,
NodeFailRep::SignalLength, JBB);
+ sendSignal(QMGR_REF, GSN_NODE_FAILREP, signal,
+ NodeFailRep::SignalLength, JBB);
+
Uint32 nodeId = 0;
while(!allFailed.isclear()){
nodeId = allFailed.find(nodeId + 1);
diff --git a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp
index 4a17d56d31e..b2202122aa1 100644
--- a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp
+++ b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp
@@ -29,6 +29,10 @@
#include <signaldata/CmRegSignalData.hpp>
#include <signaldata/ApiRegSignalData.hpp>
#include <signaldata/FailRep.hpp>
+#include <signaldata/AllocNodeId.hpp>
+
+#include <SafeCounter.hpp>
+#include <RequestTracker.hpp>
#include "timer.hpp"
@@ -222,6 +226,12 @@ private:
void execAPI_VERSION_REQ(Signal* signal);
void execAPI_BROADCAST_REP(Signal* signal);
+ void execNODE_FAILREP(Signal *);
+ void execALLOC_NODEID_REQ(Signal *);
+ void execALLOC_NODEID_CONF(Signal *);
+ void execALLOC_NODEID_REF(Signal *);
+ void completeAllocNodeIdReq(Signal *);
+
// Arbitration signals
void execARBIT_CFG(Signal* signal);
void execARBIT_PREPREQ(Signal* signal);
@@ -388,6 +398,14 @@ private:
Uint16 cprepFailedNodes[MAX_NDB_NODES];
Uint16 ccommitFailedNodes[MAX_NDB_NODES];
+ struct OpAllocNodeIdReq {
+ RequestTracker m_tracker;
+ AllocNodeIdReq m_req;
+ Uint32 m_connectCount;
+ Uint32 m_error;
+ };
+
+ struct OpAllocNodeIdReq opAllocNodeIdReq;
};
#endif
diff --git a/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp b/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
index 751641ae896..ef3b836d203 100644
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
@@ -85,6 +85,11 @@ Qmgr::Qmgr(const class Configuration & conf)
addRecSignal(GSN_READ_NODESREQ, &Qmgr::execREAD_NODESREQ);
addRecSignal(GSN_SET_VAR_REQ, &Qmgr::execSET_VAR_REQ);
addRecSignal(GSN_API_BROADCAST_REP, &Qmgr::execAPI_BROADCAST_REP);
+
+ addRecSignal(GSN_NODE_FAILREP, &Qmgr::execNODE_FAILREP);
+ addRecSignal(GSN_ALLOC_NODEID_REQ, &Qmgr::execALLOC_NODEID_REQ);
+ addRecSignal(GSN_ALLOC_NODEID_CONF, &Qmgr::execALLOC_NODEID_CONF);
+ addRecSignal(GSN_ALLOC_NODEID_REF, &Qmgr::execALLOC_NODEID_REF);
// Arbitration signals
addRecSignal(GSN_ARBIT_PREPREQ, &Qmgr::execARBIT_PREPREQ);
diff --git a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
index 0d1c7b8c500..d18375beeef 100644
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
@@ -3984,3 +3984,167 @@ Qmgr::execAPI_BROADCAST_REP(Signal* signal)
NodeReceiverGroup rg(API_CLUSTERMGR, mask);
sendSignal(rg, api.gsn, signal, len, JBB); // forward sections
}
+
+void
+Qmgr::execNODE_FAILREP(Signal * signal)
+{
+ jamEntry();
+ // make sure any distributed signals get acknowledged
+ // destructive of the signal
+ c_counterMgr.execNODE_FAILREP(signal);
+}
+
+void
+Qmgr::execALLOC_NODEID_REQ(Signal * signal)
+{
+ jamEntry();
+ const AllocNodeIdReq * req = (AllocNodeIdReq*)signal->getDataPtr();
+ Uint32 senderRef = req->senderRef;
+ Uint32 nodeId = req->nodeId;
+ Uint32 error = 0;
+
+ if (refToBlock(senderRef) != QMGR) // request from management server
+ {
+ /* master */
+
+ if (getOwnNodeId() != cpresident)
+ error = AllocNodeIdRef::NotMaster;
+ else if (!opAllocNodeIdReq.m_tracker.done())
+ error = AllocNodeIdRef::Busy;
+ else if (c_connectedNodes.get(nodeId))
+ error = AllocNodeIdRef::NodeConnected;
+
+ if (error)
+ {
+ jam();
+ AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
+ ref->senderRef = reference();
+ ref->errorCode = error;
+ ref->masterRef = numberToRef(QMGR, cpresident);
+ sendSignal(senderRef, GSN_ALLOC_NODEID_REF, signal,
+ AllocNodeIdRef::SignalLength, JBB);
+ return;
+ }
+
+ opAllocNodeIdReq.m_req = *req;
+ opAllocNodeIdReq.m_error = 0;
+ opAllocNodeIdReq.m_connectCount = getNodeInfo(refToNode(senderRef)).m_connectCount;
+
+ jam();
+ AllocNodeIdReq * req = (AllocNodeIdReq*)signal->getDataPtrSend();
+ req->senderRef = reference();
+ NodeReceiverGroup rg(QMGR, c_clusterNodes);
+ RequestTracker & p = opAllocNodeIdReq.m_tracker;
+ p.init<AllocNodeIdRef>(c_counterMgr, rg, GSN_ALLOC_NODEID_REF, 0);
+
+ sendSignal(rg, GSN_ALLOC_NODEID_REQ, signal,
+ AllocNodeIdReq::SignalLength, JBB);
+ return;
+ }
+
+ /* participant */
+
+ if (c_connectedNodes.get(nodeId))
+ error = AllocNodeIdRef::NodeConnected;
+ else
+ {
+ NodeRecPtr nodePtr;
+ nodePtr.i = nodeId;
+ ptrAss(nodePtr, nodeRec);
+ if (nodePtr.p->failState != NORMAL)
+ error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
+ }
+
+ if (error)
+ {
+ AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
+ ref->senderRef = reference();
+ ref->errorCode = error;
+ sendSignal(senderRef, GSN_ALLOC_NODEID_REF, signal,
+ AllocNodeIdRef::SignalLength, JBB);
+ return;
+ }
+
+ AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
+ conf->senderRef = reference();
+ sendSignal(senderRef, GSN_ALLOC_NODEID_CONF, signal,
+ AllocNodeIdConf::SignalLength, JBB);
+}
+
+void
+Qmgr::execALLOC_NODEID_CONF(Signal * signal)
+{
+ /* master */
+
+ jamEntry();
+ const AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtr();
+ opAllocNodeIdReq.m_tracker.reportConf(c_counterMgr,
+ refToNode(conf->senderRef));
+ completeAllocNodeIdReq(signal);
+}
+
+
+void
+Qmgr::execALLOC_NODEID_REF(Signal * signal)
+{
+ /* master */
+
+ jamEntry();
+ const AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtr();
+ if (ref->errorCode == AllocNodeIdRef::NF_FakeErrorREF)
+ {
+ opAllocNodeIdReq.m_tracker.ignoreRef(c_counterMgr,
+ refToNode(ref->senderRef));
+ }
+ else
+ {
+ opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
+ refToNode(ref->senderRef));
+ if (opAllocNodeIdReq.m_error == 0)
+ opAllocNodeIdReq.m_error = ref->errorCode;
+ }
+ completeAllocNodeIdReq(signal);
+}
+
+void
+Qmgr::completeAllocNodeIdReq(Signal *signal)
+{
+ /* master */
+
+ if (!opAllocNodeIdReq.m_tracker.done())
+ {
+ jam();
+ return;
+ }
+
+ if (opAllocNodeIdReq.m_connectCount !=
+ getNodeInfo(refToNode(opAllocNodeIdReq.m_req.senderRef)).m_connectCount)
+ {
+ // management server not same version as the original requester
+ jam();
+ return;
+ }
+
+ if (opAllocNodeIdReq.m_tracker.hasRef())
+ {
+ jam();
+ AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
+ ref->senderRef = reference();
+ ref->senderData = opAllocNodeIdReq.m_req.senderData;
+ ref->nodeId = opAllocNodeIdReq.m_req.nodeId;
+ ref->errorCode = opAllocNodeIdReq.m_error;
+ ref->masterRef = numberToRef(QMGR, cpresident);
+ ndbassert(AllocNodeIdRef::SignalLength == 5);
+ sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_REF, signal,
+ AllocNodeIdRef::SignalLength, JBB);
+ return;
+ }
+ jam();
+ AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
+ conf->senderRef = reference();
+ conf->senderData = opAllocNodeIdReq.m_req.senderData;
+ conf->nodeId = opAllocNodeIdReq.m_req.nodeId;
+ ndbassert(AllocNodeIdConf::SignalLength == 3);
+ sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
+ AllocNodeIdConf::SignalLength, JBB);
+}
diff --git a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp
index b3a25eaa0da..cf9965b94e0 100644
--- a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp
+++ b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp
@@ -40,6 +40,7 @@
#include <signaldata/ManagementServer.hpp>
#include <signaldata/NFCompleteRep.hpp>
#include <signaldata/NodeFailRep.hpp>
+#include <signaldata/AllocNodeId.hpp>
#include <NdbSleep.h>
#include <EventLogger.hpp>
#include <DebuggerNames.hpp>
@@ -1712,6 +1713,88 @@ MgmtSrvr::get_connected_nodes(NodeBitmask &connected_nodes) const
}
}
+int
+MgmtSrvr::alloc_node_id_req(Uint32 free_node_id)
+{
+ SignalSender ss(theFacade);
+ ss.lock(); // lock will be released on exit
+
+ SimpleSignal ssig;
+ AllocNodeIdReq* req = CAST_PTR(AllocNodeIdReq, ssig.getDataPtrSend());
+ ssig.set(ss, TestOrd::TraceAPI, QMGR, GSN_ALLOC_NODEID_REQ,
+ AllocNodeIdReq::SignalLength);
+
+ req->senderRef = ss.getOwnRef();
+ req->senderData = 19;
+ req->nodeId = free_node_id;
+
+ int do_send = 1;
+ NodeId nodeId = 0;
+ while (1)
+ {
+ if (nodeId == 0)
+ {
+ bool next;
+ while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
+ theFacade->get_node_alive(nodeId) == false);
+ if (!next)
+ return NO_CONTACT_WITH_DB_NODES;
+ do_send = 1;
+ }
+ if (do_send)
+ {
+ if (ss.sendSignal(nodeId, &ssig) != SEND_OK) {
+ return SEND_OR_RECEIVE_FAILED;
+ }
+ do_send = 0;
+ }
+
+ SimpleSignal *signal = ss.waitFor();
+
+ int gsn = signal->readSignalNumber();
+ switch (gsn) {
+ case GSN_ALLOC_NODEID_CONF:
+ {
+ const AllocNodeIdConf * const conf =
+ CAST_CONSTPTR(AllocNodeIdConf, signal->getDataPtr());
+ return 0;
+ }
+ case GSN_ALLOC_NODEID_REF:
+ {
+ const AllocNodeIdRef * const ref =
+ CAST_CONSTPTR(AllocNodeIdRef, signal->getDataPtr());
+ if (ref->errorCode == AllocNodeIdRef::NotMaster ||
+ ref->errorCode == AllocNodeIdRef::Busy)
+ {
+ do_send = 1;
+ nodeId = refToNode(ref->masterRef);
+ continue;
+ }
+ return ref->errorCode;
+ }
+ case GSN_NF_COMPLETEREP:
+ {
+ const NFCompleteRep * const rep =
+ CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
+#ifdef VM_TRACE
+ ndbout_c("Node %d fail completed", rep->failedNodeId);
+#endif
+ if (rep->failedNodeId == nodeId)
+ nodeId = 0;
+ continue;
+ }
+ case GSN_NODE_FAILREP:{
+ // ignore NF_COMPLETEREP will come
+ continue;
+ }
+ default:
+ report_unknown_signal(signal);
+ return SEND_OR_RECEIVE_FAILED;
+ }
+ }
+ return 0;
+}
+
bool
MgmtSrvr::alloc_node_id(NodeId * nodeId,
enum ndb_mgm_node_type type,
@@ -1836,6 +1919,39 @@ MgmtSrvr::alloc_node_id(NodeId * nodeId,
}
NdbMutex_Unlock(m_configMutex);
+ if (id_found && client_addr != 0)
+ {
+ int res = alloc_node_id_req(id_found);
+ unsigned save_id_found = id_found;
+ switch (res)
+ {
+ case 0:
+ // ok continue
+ break;
+ case NO_CONTACT_WITH_DB_NODES:
+ // ok continue
+ break;
+ default:
+ // something wrong
+ id_found = 0;
+ break;
+
+ }
+ if (id_found == 0)
+ {
+ char buf[128];
+ ndb_error_string(res, buf, sizeof(buf));
+ error_string.appfmt("Cluster refused allocation of id %d. Error: %d (%s).",
+ save_id_found, res, buf);
+ g_eventLogger.warning("Cluster refused allocation of id %d. "
+ "Connection from ip %s. "
+ "Returned error string \"%s\"", save_id_found,
+ inet_ntoa(((struct sockaddr_in *)(client_addr))->sin_addr),
+ error_string.c_str());
+ DBUG_RETURN(false);
+ }
+ }
+
if (id_found)
{
*nodeId= id_found;
diff --git a/storage/ndb/src/mgmsrv/MgmtSrvr.hpp b/storage/ndb/src/mgmsrv/MgmtSrvr.hpp
index 0e2ebad8188..ec6ab47bc2a 100644
--- a/storage/ndb/src/mgmsrv/MgmtSrvr.hpp
+++ b/storage/ndb/src/mgmsrv/MgmtSrvr.hpp
@@ -506,7 +506,8 @@ private:
* @return -1 if block not found, otherwise block number
*/
int getBlockNumber(const BaseString &blockName);
-
+
+ int alloc_node_id_req(Uint32 free_node_id);
//**************************************************************************
int _blockNumber;
diff --git a/storage/ndb/src/ndbapi/ndberror.c b/storage/ndb/src/ndbapi/ndberror.c
index 389c1008536..6ea45fd0d60 100644
--- a/storage/ndb/src/ndbapi/ndberror.c
+++ b/storage/ndb/src/ndbapi/ndberror.c
@@ -81,6 +81,7 @@ static const char* empty_string = "";
* 1400 - SUMA
* 1500 - LGMAN
* 1600 - TSMAN
+ * 1700 - QMGR
* 4000 - API
* 4100 - ""
* 4200 - ""
@@ -450,6 +451,15 @@ ErrorBundle ErrorCodes[] = {
{ 1348, DMEC, AE, "Backup failed to allocate file record (check configuration)" },
{ 1349, DMEC, AE, "Backup failed to allocate attribute record (check configuration)" },
{ 1329, DMEC, AE, "Backup during software upgrade not supported" },
+
+ /**
+ * Node id allocation error codes
+ */
+
+ { 1700, DMEC, IE, "Undefined error" },
+ { 1701, DMEC, AE, "Node already reserved" },
+ { 1702, DMEC, AE, "Node already connected" },
+ { 1703, DMEC, AE, "Node failure handling not completed" },
/**
* Still uncategorized