diff options
author | Alan Conway <aconway@apache.org> | 2012-11-09 21:45:06 +0000 |
---|---|---|
committer | Alan Conway <aconway@apache.org> | 2012-11-09 21:45:06 +0000 |
commit | 8ff1f8c1a0eb2cee06402544f6ccac6e57b1bcd2 (patch) | |
tree | da906ba0e6424a556ef31b4a7e928304da116d68 | |
parent | 464ae39a9a52becfb9b22fa2d5efc8624617bffa (diff) | |
download | qpid-python-8ff1f8c1a0eb2cee06402544f6ccac6e57b1bcd2.tar.gz |
Bug 874118 - HA Deadlock in backup broker after disconnecting from primary.
The backup broker was running ExchangeRegistry::for_each to clean up
connections, but this holds the ExchangeRegistry lock and hence the deadlock.
Now we copy a list of exchanges with for_each and work on it without the lock.
The issue showed up for 0 timeouts only because the queue schedules non-0
timeouts to a separate timer thread.
git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@1407661 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | qpid/cpp/src/qpid/ha/BrokerReplicator.cpp | 23 | ||||
-rw-r--r-- | qpid/cpp/src/qpid/ha/BrokerReplicator.h | 2 | ||||
-rwxr-xr-x | qpid/cpp/src/tests/ha_tests.py | 24 |
3 files changed, 28 insertions, 21 deletions
diff --git a/qpid/cpp/src/qpid/ha/BrokerReplicator.cpp b/qpid/cpp/src/qpid/ha/BrokerReplicator.cpp index 11cc889ad6..45b5ee64c2 100644 --- a/qpid/cpp/src/qpid/ha/BrokerReplicator.cpp +++ b/qpid/cpp/src/qpid/ha/BrokerReplicator.cpp @@ -63,6 +63,7 @@ using qmf::org::apache::qpid::ha::EventMembersUpdate; using qpid::broker::amqp_0_10::MessageTransfer; using namespace framing; using namespace std; +using namespace boost; using std::ostream; using types::Variant; using namespace broker; @@ -790,9 +791,7 @@ bool BrokerReplicator::isBound(boost::shared_ptr<Queue>, const string* const, co string BrokerReplicator::getType() const { return QPID_CONFIGURATION_REPLICATOR; } -void BrokerReplicator::autoDeleteCheck( - boost::shared_ptr<Exchange> ex, set<string>& result) -{ +void BrokerReplicator::autoDeleteCheck(boost::shared_ptr<Exchange> ex) { boost::shared_ptr<QueueReplicator> qr(boost::dynamic_pointer_cast<QueueReplicator>(ex)); if (!qr) return; assert(qr); @@ -802,8 +801,9 @@ void BrokerReplicator::autoDeleteCheck( Queue::tryAutoDelete(broker, qr->getQueue(), remoteHost, userId); } else { - // Mark for immediate deletion. - result.insert(qr->getQueue()->getName()); + // Delete immediately. Don't purge, the primary is gone so we need + // to reroute the deleted messages. + deleteQueue(qr->getQueue()->getName(), false); } } } @@ -812,13 +812,12 @@ void BrokerReplicator::disconnected() { QPID_LOG(info, logPrefix << "Disconnected"); connection = 0; // Clean up auto-delete queues - set<string> deleteQueues; - exchanges.eachExchange(boost::bind(&BrokerReplicator::autoDeleteCheck, - this, _1, boost::ref(deleteQueues))); - // Don't purge before deleting, the primary is gone so we need to - // reroute the deleted messages. - for_each(deleteQueues.begin(), deleteQueues.end(), - boost::bind(&BrokerReplicator::deleteQueue, this, _1, false)); + vector<boost::shared_ptr<Exchange> > collect; + // Make a copy so we can work outside the ExchangeRegistry lock + exchanges.eachExchange( + boost::bind(&vector<boost::shared_ptr<Exchange> >::push_back, ref(collect), _1)); + for_each(collect.begin(), collect.end(), + boost::bind(&BrokerReplicator::autoDeleteCheck, this, _1)); } }} // namespace broker diff --git a/qpid/cpp/src/qpid/ha/BrokerReplicator.h b/qpid/cpp/src/qpid/ha/BrokerReplicator.h index 1a86be63c3..9134163575 100644 --- a/qpid/cpp/src/qpid/ha/BrokerReplicator.h +++ b/qpid/cpp/src/qpid/ha/BrokerReplicator.h @@ -133,7 +133,7 @@ class BrokerReplicator : public broker::Exchange, void deleteQueue(const std::string& name, bool purge=true); void deleteExchange(const std::string& name); - void autoDeleteCheck(boost::shared_ptr<broker::Exchange>, std::set<std::string>&); + void autoDeleteCheck(boost::shared_ptr<broker::Exchange>); void disconnected(); std::string logPrefix; diff --git a/qpid/cpp/src/tests/ha_tests.py b/qpid/cpp/src/tests/ha_tests.py index 3b69e3de33..a7f276becb 100755 --- a/qpid/cpp/src/tests/ha_tests.py +++ b/qpid/cpp/src/tests/ha_tests.py @@ -753,11 +753,16 @@ acl deny all all def test_auto_delete_timeout(self): cluster = HaCluster(self, 2) - s = cluster[0].connect().session().receiver("q;{create:always,node:{x-declare:{auto-delete:True,arguments:{'qpid.auto_delete_timeout':1}}}}") - cluster[1].wait_queue("q") + # Test timeout + r1 = cluster[0].connect().session().receiver("q1;{create:always,node:{x-declare:{auto-delete:True,arguments:{'qpid.auto_delete_timeout':1}}}}") + # Test special case of timeout = 0 + r0 = cluster[0].connect().session().receiver("q0;{create:always,node:{x-declare:{auto-delete:True,arguments:{'qpid.auto_delete_timeout':0}}}}") + cluster[1].wait_queue("q0") + cluster[1].wait_queue("q1") cluster[0].kill() - cluster[1].wait_queue("q") # Not timed out yet - cluster[1].wait_no_queue("q") # Wait for timeout + cluster[1].wait_queue("q1") # Not timed out yet + cluster[1].wait_no_queue("q1", timeout=2) # Wait for timeout + cluster[1].wait_no_queue("q0", timeout=2) def test_alt_exchange_dup(self): """QPID-4349: if a queue has an alterante exchange and is deleted the @@ -817,10 +822,13 @@ acl deny all all if class_name(m) == 'queueDeclare' and q_name(m) == qname: found = True except Empty: pass assert(found) - verify_qmf_events("q1") - cluster[1].wait_status("ready") - cluster.kill(0) - verify_qmf_events("q2") + try: + verify_qmf_events("q1") + cluster[1].wait_status("ready") + l = LogLevel(ERROR) # Hide expected WARNING log messages from failover. + cluster.kill(0) + verify_qmf_events("q2") + finally: l.restore() def fairshare(msgs, limit, levels): """ |