summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlan Conway <aconway@apache.org>2012-03-08 21:47:04 +0000
committerAlan Conway <aconway@apache.org>2012-03-08 21:47:04 +0000
commit6c101f2b7ca22619dc4c0874f918c4654f6bbc89 (patch)
tree33d0a44922f0765c360ba8ee12c1b32aeca24098
parent4c9383f086e36d61656bff975bc9b0188c08b85f (diff)
downloadqpid-python-6c101f2b7ca22619dc4c0874f918c4654f6bbc89.tar.gz
QPID-3603: rgmanager integration for new HA cluster
- service start script qpid-primary, called by rgmanager to promote a broker. - example cluster.conf - minor changes to qpid-ha git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@1298602 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--qpid/cpp/etc/Makefile.am2
-rw-r--r--qpid/cpp/etc/cluster.conf-example.xml77
-rwxr-xr-xqpid/cpp/etc/qpidd-primary102
-rwxr-xr-xqpid/cpp/src/tests/cluster_failover18
-rwxr-xr-xqpid/tools/src/py/qpid-ha27
5 files changed, 208 insertions, 18 deletions
diff --git a/qpid/cpp/etc/Makefile.am b/qpid/cpp/etc/Makefile.am
index 1e4db561a7..ab7c62d95d 100644
--- a/qpid/cpp/etc/Makefile.am
+++ b/qpid/cpp/etc/Makefile.am
@@ -20,7 +20,7 @@ SASL_CONF = sasl2/qpidd.conf
EXTRA_DIST = \
$(SASL_CONF) \
- qpidd qpidd.conf qpidc.conf CMakeLists.txt
+ qpidd qpidd-primary qpidd.conf qpidc.conf CMakeLists.txt
confdir=$(sysconfdir)/qpid
nobase_conf_DATA=\
diff --git a/qpid/cpp/etc/cluster.conf-example.xml b/qpid/cpp/etc/cluster.conf-example.xml
new file mode 100644
index 0000000000..555843fd8e
--- /dev/null
+++ b/qpid/cpp/etc/cluster.conf-example.xml
@@ -0,0 +1,77 @@
+<?xml version="1.0"?>
+<!--
+This is an example of a cluster.conf file to run qpidd under rgmanager.
+This example assumes a 3 node cluster, with nodes named mrg32, mrg34 and mrg35.
+There is no fencing in this configuration.
+-->
+
+<cluster alias="qpid-hot-standby" config_version="4" name="qpid-hot-standby">
+ <!-- The cluster has 3 nodes. Each has a unique nodid and one vote for quorum. -->
+ <clusternodes>
+ <clusternode name="mrg32" nodeid="1">
+ <fence/>
+ </clusternode>
+ <clusternode name="mrg34" nodeid="2">
+ <fence/>
+ </clusternode>
+ <clusternode name="mrg35" nodeid="3">
+ <fence/>
+ </clusternode>
+ </clusternodes>
+ <cman/>
+ <!-- Optionally you can specify logging, this is the most verbose -->
+ <rm log_level="7" log_facility="daemon">
+
+ <!--
+ There is a failoverdomain for each node containing just that node.
+ This lets us stipulate that the qpidd service should always run on all nodes.
+ -->
+ <failoverdomains>
+ <failoverdomain name="mrg32-domain" restricted="1">
+ <failoverdomainnode name="mrg32"/>
+ </failoverdomain>
+ <failoverdomain name="mrg34-domain" restricted="1">
+ <failoverdomainnode name="mrg34"/>
+ </failoverdomain>
+ <failoverdomain name="mrg35-domain" restricted="1">
+ <failoverdomainnode name="mrg35"/>
+ </failoverdomain>
+ </failoverdomains>
+
+ <resources>
+ <!-- This script starts a qpidd broker acting as a backup. -->
+ <script file="/etc/init.d/qpidd" name="qpidd"/>
+
+ <!-- This script promotes the qpidd broker on this node to primary. -->
+ <script file="/etc/init.d/qpidd-primary" name="qpidd-primary"/>
+
+ <!-- This is a virtual IP address for broker replication traffic. -->
+ <ip address="20.0.10.200" monitor_link="1"/>
+
+ <!-- This is a virtual IP address on a seprate network for client traffic. -->
+ <ip address="20.0.20.200" monitor_link="1"/>
+ </resources>
+
+ <!-- There is a qpidd service on each node, it should be restarted if it fails. -->
+ <service name="mrg32-qpidd-service" domain="mrg32-domain" recovery="restart">
+ <script ref="qpidd"/>
+ </service>
+ <service name="mrg34-qpidd-service" domain="mrg34-domain" recovery="restart">
+ <script ref="qpidd"/>
+ </service>
+ <service name="mrg35-qpidd-service" domain="mrg35-domain" recovery="restart">
+ <script ref="qpidd"/>
+ </service>
+
+ <!-- There should always be a single qpidd-primary service, it can run on any node. -->
+ <service name="qpidd-primary-service" autostart="1" exclusive="0" recovery="relocate">
+ <script ref="qpidd-primary"/>
+ <!-- The primary has the IP addresses for brokers and clients. -->
+ <ip ref="20.0.10.200"/>
+ <ip ref="20.0.20.200"/>
+ </service>
+ </rm>
+ <fencedevices/>
+ <fence_daemon clean_start="0" post_fail_delay="0" post_join_delay="3"/>
+</cluster>
+
diff --git a/qpid/cpp/etc/qpidd-primary b/qpid/cpp/etc/qpidd-primary
new file mode 100755
index 0000000000..2bbc22129d
--- /dev/null
+++ b/qpid/cpp/etc/qpidd-primary
@@ -0,0 +1,102 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+# qpidd Startup script for the Qpid messaging daemon.
+#
+
+### BEGIN INIT INFO
+# Provides: qpidd-primary
+# Required-Start: $qpidd
+# Required-Stop: $qpidd
+# Default-Start:
+# Default-Stop: 0 1 2 3 4 5 6
+# Short-Description: promote qpidd to cluster primary
+# Description: Qpidd can be run in an active/passive cluster. Promote a running qpidd to primary.
+### END INIT INFO
+
+# chkconfig: - 85 15
+# description: Qpidd can be run in an active/passive cluster. Promote a running qpidd to primary.
+# processname: qpidd
+
+prog=qpidd
+
+# Source function library.
+. /etc/rc.d/init.d/functions
+
+if [ -f /etc/sysconfig/$prog ] ; then
+ . /etc/sysconfig/$prog
+fi
+
+# The following variables can be overridden in /etc/sysconfig/$prog
+[[ $QPID_PORT ]] || QPID_PORT=5672
+[[ $QPID_HA ]] || QPID_HA=/usr/bin/qpid-ha
+export QPID_PORT
+
+RETVAL=0
+
+#ensure binary is present and executable
+if [[ !(-x $QPID_HA) ]]; then
+ echo "qpid-ha executable not found or not executable"
+fi
+
+status() {
+ if $QPID_HA -b localhost:$QPID_PORT status --expect=primary ; then
+ echo "qpidd is primary"
+ else
+ echo "qpidd is not primary"
+ return 1
+ fi
+}
+
+start() {
+ service qpidd start
+ echo -n $"Promoting qpid daemon to cluster primary: "
+ $QPID_HA -b localhost:$QPID_PORT promote
+ [ "$?" -eq 0 ] && success || failure
+}
+
+stop() {
+ service qpidd stop
+}
+
+reload() {
+ echo 1>&2 $"$0: reload not supported"
+ exit 3
+}
+
+restart() {
+ service qpidd restart && start
+}
+
+# See how we were called.
+case "$1" in
+ start|stop|status|restart|reload)
+ $1
+ RETVAL=$?
+ ;;
+ force-reload)
+ restart
+ ;;
+ *)
+ echo 1>&2 $"Usage: $0 {start|stop|status|restart|force-reload}"
+ exit 2
+esac
+
+exit $RETVAL
diff --git a/qpid/cpp/src/tests/cluster_failover b/qpid/cpp/src/tests/cluster_failover
new file mode 100755
index 0000000000..c978ee910c
--- /dev/null
+++ b/qpid/cpp/src/tests/cluster_failover
@@ -0,0 +1,18 @@
+#!/bin/sh
+# A simple manual failover test, sends a stream of numbered messages.
+# You can kill the connected broker and verify that the client reconnects
+# and no messages are lost.
+
+URL=$1
+test -n "$URL" || { echo Usage: $0 URL ; exit 1; }
+SEND=$(mktemp /tmp/send.XXXXXXXXXX)
+RECV=$(mktemp /tmp/recv.XXXXXXXXXX)
+echo $SEND $RECV
+
+seq 1000000 > $SEND
+
+qpid-receive -f -a 'q;{create:always}' -b $URL --connection-options "{reconnect:true}" | tee $RECV &
+
+qpid-send -a 'q;{create:always}' -b $URL --connection-options "{reconnect:true}" --send-rate 10 --content-stdin < $SEND &
+
+wait
diff --git a/qpid/tools/src/py/qpid-ha b/qpid/tools/src/py/qpid-ha
index 5c757f3c6c..ac35e3ed2c 100755
--- a/qpid/tools/src/py/qpid-ha
+++ b/qpid/tools/src/py/qpid-ha
@@ -67,26 +67,19 @@ class PromoteCmd(Command):
qmf_broker._method("promote", {}, HA_BROKER)
PromoteCmd()
-class ReadyCmd(Command):
+class StatusCmd(Command):
def __init__(self):
- Command.__init__(self, "ready", "Test if a backup broker is ready.\nReturn 0 if broker is a ready backup, non-0 otherwise.")
+ Command.__init__(self, "status", "Print HA status")
self.op.add_option(
- "--wait", type="int", metavar="<seconds>", default=None,
- help="Wait up to <seconds> for broker to be ready. 0 means wait forever.")
+ "--expect", type="string", metavar="<status>",
+ help="Don't print status but return 0 if it matches <status>, 1 otherwise")
def do_execute(self, qmf_broker, ha_broker, opts, args):
- if (ha_broker.status == "backup"): return
- if (ha_broker.status != "catch-up"):
- raise Exception("Broker is not a backup, status is '%s'"%ha_broker.status)
- if (opts.wait is None): return 1
- delay = 0.1
- timeout = time.time() + opts.wait
- while opts.wait == 0 or time.time() < timeout:
- time.sleep(delay)
- delay = min(2*delay, 1)
- ha_broker = qmf_broker.getHaBroker()
- if (ha_broker.status == "backup"): return
- return 1
-ReadyCmd()
+ if opts.expect:
+ if opts.expect != ha_broker.status: return 1
+ else:
+ print ha_broker.status
+ return 0
+StatusCmd()
class ReplicateCmd(Command):
def __init__(self):