summaryrefslogtreecommitdiff
path: root/qpid/cpp/src
diff options
context:
space:
mode:
authorAlan Conway <aconway@apache.org>2012-08-07 15:50:58 +0000
committerAlan Conway <aconway@apache.org>2012-08-07 15:50:58 +0000
commitc80fa92b3eb2ddd298d9e52bfad8cdbf5b69f230 (patch)
treed8d6e579c441eeb09e1e97f928b1ed7ed2f412d1 /qpid/cpp/src
parent3d386e1859cd15857ec1a5e3ed7c8ee917de7900 (diff)
downloadqpid-python-c80fa92b3eb2ddd298d9e52bfad8cdbf5b69f230.tar.gz
NO-JIRA: HA only expect READY backups in recovery.
Don't wait for un-ready backups to become ready in recover, they weren't ready before the failure so don't wait for them to become ready after a failure. Waiting for READY backups gives us equivalent safety to before the failure. Minor test & log improvements. git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@1370325 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'qpid/cpp/src')
-rw-r--r--qpid/cpp/src/qpid/ha/Membership.cpp2
-rw-r--r--qpid/cpp/src/qpid/ha/Membership.h2
-rw-r--r--qpid/cpp/src/qpid/ha/Primary.cpp2
-rwxr-xr-xqpid/cpp/src/tests/ha_tests.py8
4 files changed, 8 insertions, 6 deletions
diff --git a/qpid/cpp/src/qpid/ha/Membership.cpp b/qpid/cpp/src/qpid/ha/Membership.cpp
index cc2906dd8f..74580f9b1e 100644
--- a/qpid/cpp/src/qpid/ha/Membership.cpp
+++ b/qpid/cpp/src/qpid/ha/Membership.cpp
@@ -66,7 +66,7 @@ types::Variant::List Membership::asList() const {
BrokerInfo::Set Membership::otherBackups() const {
BrokerInfo::Set result;
for (BrokerInfo::Map::const_iterator i = brokers.begin(); i != brokers.end(); ++i)
- if (isBackup(i->second.getStatus()) && i->second.getSystemId() != self)
+ if (i->second.getStatus() == READY && i->second.getSystemId() != self)
result.insert(i->second);
return result;
}
diff --git a/qpid/cpp/src/qpid/ha/Membership.h b/qpid/cpp/src/qpid/ha/Membership.h
index 3bd8653a64..8406dccd5d 100644
--- a/qpid/cpp/src/qpid/ha/Membership.h
+++ b/qpid/cpp/src/qpid/ha/Membership.h
@@ -47,7 +47,7 @@ class Membership
void add(const BrokerInfo& b);
void remove(const types::Uuid& id);
bool contains(const types::Uuid& id);
- /** Return IDs of all backups other than self */
+ /** Return IDs of all READY backups other than self */
BrokerInfo::Set otherBackups() const;
void assign(const types::Variant::List&);
diff --git a/qpid/cpp/src/qpid/ha/Primary.cpp b/qpid/cpp/src/qpid/ha/Primary.cpp
index 45a0e246f3..4462d91062 100644
--- a/qpid/cpp/src/qpid/ha/Primary.cpp
+++ b/qpid/cpp/src/qpid/ha/Primary.cpp
@@ -201,6 +201,7 @@ void Primary::opened(broker::Connection& connection) {
Mutex::ScopedLock l(lock);
BackupMap::iterator i = backups.find(info.getSystemId());
if (i == backups.end()) {
+ QPID_LOG(debug, logPrefix << "New backup connected: " << info);
boost::shared_ptr<RemoteBackup> backup(
new RemoteBackup(info, haBroker.getReplicationTest(), true));
{
@@ -209,7 +210,6 @@ void Primary::opened(broker::Connection& connection) {
backup->setInitialQueues(haBroker.getBroker().getQueues(), false);
}
backups[info.getSystemId()] = backup;
- QPID_LOG(debug, logPrefix << "New backup connected: " << info);
}
else {
QPID_LOG(debug, logPrefix << "Known backup connected: " << info);
diff --git a/qpid/cpp/src/tests/ha_tests.py b/qpid/cpp/src/tests/ha_tests.py
index d25281eed5..246b0ed423 100755
--- a/qpid/cpp/src/tests/ha_tests.py
+++ b/qpid/cpp/src/tests/ha_tests.py
@@ -111,9 +111,11 @@ class HaBroker(Broker):
def wait_status(self, status):
def try_get_status():
# Ignore ConnectionError, the broker may not be up yet.
- try: return self.ha_status() == status;
+ try:
+ self._status = self.ha_status()
+ return self._status == status;
except ConnectionError: return False
- assert retry(try_get_status, timeout=20), "%s status != %r"%(self, status)
+ assert retry(try_get_status, timeout=20), "%s %r != %r"%(self, self._status, status)
# FIXME aconway 2012-05-01: do direct python call to qpid-config code.
def qpid_config(self, args):
@@ -963,7 +965,7 @@ class RecoveryTests(BrokerTest):
"""
cluster = HaCluster(self, 3, args=["--ha-backup-timeout=0.5"]);
cluster[0].wait_status("active") # Primary ready
- for b in cluster[1:4]: b.wait_status("ready") # Backups ready
+ for b in cluster[1:3]: b.wait_status("ready") # Backups ready
for i in [0,1]: cluster.kill(i, False)
cluster[2].promote() # New primary, backups will be 1 and 2
cluster[2].wait_status("recovering")