summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlan Conway <aconway@apache.org>2012-11-09 21:45:06 +0000
committerAlan Conway <aconway@apache.org>2012-11-09 21:45:06 +0000
commit8ff1f8c1a0eb2cee06402544f6ccac6e57b1bcd2 (patch)
treeda906ba0e6424a556ef31b4a7e928304da116d68
parent464ae39a9a52becfb9b22fa2d5efc8624617bffa (diff)
downloadqpid-python-8ff1f8c1a0eb2cee06402544f6ccac6e57b1bcd2.tar.gz
Bug 874118 - HA Deadlock in backup broker after disconnecting from primary.
The backup broker was running ExchangeRegistry::for_each to clean up connections, but this holds the ExchangeRegistry lock and hence the deadlock. Now we copy a list of exchanges with for_each and work on it without the lock. The issue showed up for 0 timeouts only because the queue schedules non-0 timeouts to a separate timer thread. git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@1407661 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--qpid/cpp/src/qpid/ha/BrokerReplicator.cpp23
-rw-r--r--qpid/cpp/src/qpid/ha/BrokerReplicator.h2
-rwxr-xr-xqpid/cpp/src/tests/ha_tests.py24
3 files changed, 28 insertions, 21 deletions
diff --git a/qpid/cpp/src/qpid/ha/BrokerReplicator.cpp b/qpid/cpp/src/qpid/ha/BrokerReplicator.cpp
index 11cc889ad6..45b5ee64c2 100644
--- a/qpid/cpp/src/qpid/ha/BrokerReplicator.cpp
+++ b/qpid/cpp/src/qpid/ha/BrokerReplicator.cpp
@@ -63,6 +63,7 @@ using qmf::org::apache::qpid::ha::EventMembersUpdate;
using qpid::broker::amqp_0_10::MessageTransfer;
using namespace framing;
using namespace std;
+using namespace boost;
using std::ostream;
using types::Variant;
using namespace broker;
@@ -790,9 +791,7 @@ bool BrokerReplicator::isBound(boost::shared_ptr<Queue>, const string* const, co
string BrokerReplicator::getType() const { return QPID_CONFIGURATION_REPLICATOR; }
-void BrokerReplicator::autoDeleteCheck(
- boost::shared_ptr<Exchange> ex, set<string>& result)
-{
+void BrokerReplicator::autoDeleteCheck(boost::shared_ptr<Exchange> ex) {
boost::shared_ptr<QueueReplicator> qr(boost::dynamic_pointer_cast<QueueReplicator>(ex));
if (!qr) return;
assert(qr);
@@ -802,8 +801,9 @@ void BrokerReplicator::autoDeleteCheck(
Queue::tryAutoDelete(broker, qr->getQueue(), remoteHost, userId);
}
else {
- // Mark for immediate deletion.
- result.insert(qr->getQueue()->getName());
+ // Delete immediately. Don't purge, the primary is gone so we need
+ // to reroute the deleted messages.
+ deleteQueue(qr->getQueue()->getName(), false);
}
}
}
@@ -812,13 +812,12 @@ void BrokerReplicator::disconnected() {
QPID_LOG(info, logPrefix << "Disconnected");
connection = 0;
// Clean up auto-delete queues
- set<string> deleteQueues;
- exchanges.eachExchange(boost::bind(&BrokerReplicator::autoDeleteCheck,
- this, _1, boost::ref(deleteQueues)));
- // Don't purge before deleting, the primary is gone so we need to
- // reroute the deleted messages.
- for_each(deleteQueues.begin(), deleteQueues.end(),
- boost::bind(&BrokerReplicator::deleteQueue, this, _1, false));
+ vector<boost::shared_ptr<Exchange> > collect;
+ // Make a copy so we can work outside the ExchangeRegistry lock
+ exchanges.eachExchange(
+ boost::bind(&vector<boost::shared_ptr<Exchange> >::push_back, ref(collect), _1));
+ for_each(collect.begin(), collect.end(),
+ boost::bind(&BrokerReplicator::autoDeleteCheck, this, _1));
}
}} // namespace broker
diff --git a/qpid/cpp/src/qpid/ha/BrokerReplicator.h b/qpid/cpp/src/qpid/ha/BrokerReplicator.h
index 1a86be63c3..9134163575 100644
--- a/qpid/cpp/src/qpid/ha/BrokerReplicator.h
+++ b/qpid/cpp/src/qpid/ha/BrokerReplicator.h
@@ -133,7 +133,7 @@ class BrokerReplicator : public broker::Exchange,
void deleteQueue(const std::string& name, bool purge=true);
void deleteExchange(const std::string& name);
- void autoDeleteCheck(boost::shared_ptr<broker::Exchange>, std::set<std::string>&);
+ void autoDeleteCheck(boost::shared_ptr<broker::Exchange>);
void disconnected();
std::string logPrefix;
diff --git a/qpid/cpp/src/tests/ha_tests.py b/qpid/cpp/src/tests/ha_tests.py
index 3b69e3de33..a7f276becb 100755
--- a/qpid/cpp/src/tests/ha_tests.py
+++ b/qpid/cpp/src/tests/ha_tests.py
@@ -753,11 +753,16 @@ acl deny all all
def test_auto_delete_timeout(self):
cluster = HaCluster(self, 2)
- s = cluster[0].connect().session().receiver("q;{create:always,node:{x-declare:{auto-delete:True,arguments:{'qpid.auto_delete_timeout':1}}}}")
- cluster[1].wait_queue("q")
+ # Test timeout
+ r1 = cluster[0].connect().session().receiver("q1;{create:always,node:{x-declare:{auto-delete:True,arguments:{'qpid.auto_delete_timeout':1}}}}")
+ # Test special case of timeout = 0
+ r0 = cluster[0].connect().session().receiver("q0;{create:always,node:{x-declare:{auto-delete:True,arguments:{'qpid.auto_delete_timeout':0}}}}")
+ cluster[1].wait_queue("q0")
+ cluster[1].wait_queue("q1")
cluster[0].kill()
- cluster[1].wait_queue("q") # Not timed out yet
- cluster[1].wait_no_queue("q") # Wait for timeout
+ cluster[1].wait_queue("q1") # Not timed out yet
+ cluster[1].wait_no_queue("q1", timeout=2) # Wait for timeout
+ cluster[1].wait_no_queue("q0", timeout=2)
def test_alt_exchange_dup(self):
"""QPID-4349: if a queue has an alterante exchange and is deleted the
@@ -817,10 +822,13 @@ acl deny all all
if class_name(m) == 'queueDeclare' and q_name(m) == qname: found = True
except Empty: pass
assert(found)
- verify_qmf_events("q1")
- cluster[1].wait_status("ready")
- cluster.kill(0)
- verify_qmf_events("q2")
+ try:
+ verify_qmf_events("q1")
+ cluster[1].wait_status("ready")
+ l = LogLevel(ERROR) # Hide expected WARNING log messages from failover.
+ cluster.kill(0)
+ verify_qmf_events("q2")
+ finally: l.restore()
def fairshare(msgs, limit, levels):
"""