summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlan Conway <aconway@apache.org>2015-02-26 21:55:47 +0000
committerAlan Conway <aconway@apache.org>2015-02-26 21:55:47 +0000
commit46abec8a404f17b2d2f2ab18f3d67ca7ceef6165 (patch)
treec0c945a022deb08073c7f98c4ed4e60750922d2e
parent50a8c4aa1d572ad9fbfebda5e46457c6b10164c0 (diff)
downloadqpid-python-46abec8a404f17b2d2f2ab18f3d67ca7ceef6165.tar.gz
QPID-6413: Sporadic failure of HA tests causd by maxNegotiateTimeout
Increased maxNegotiateTimeout to the default (10 seconds). A smaller value speeds up detection of non-running brokers on remote hosts, but this is not necessary for the tests. Increased some other test timeouts and added some improved error reporting. The occasional long (> 1 second) connection delays are caused by Cyrus SASL authentication. Not clear why this takes so long, but that is a separate issue. Here's a client log excerpt showing the delay. 2015-02-25 08:29:37.461299895 [Network] trace RECV [[127.0.0.1:34247-127.0.0.1:45983]]: Frame[BEbe; channel=0; {ConnectionStartBody: server-properties={qpid.federation_tag:V2:36:str16(77800bff-a176-46c1-917a-32f136dee650)}; mechanisms=str16{V2:9:str16(ANONYMOUS), V2:5:str16(PLAIN)}; locales=str16{V2:5:str16(en_US)}; }] 2015-02-25 08:29:37.463116303 [Security] debug CyrusSasl::start(ANONYMOUS PLAIN) (Note delay > 1 sec here) 2015-02-25 08:29:38.839793753 [Security] debug min_ssf: 0, max_ssf: 256 2015-02-25 08:29:38.839851781 [Security] debug CyrusSasl::start(ANONYMOUS PLAIN): selected ANONYMOUS response: 'anonymous@wallace' 2015-02-25 08:29:38.839963162 [Client] warning Connection [127.0.0.1:34247-127.0.0.1:45983] closed git-svn-id: https://svn.apache.org/repos/asf/qpid/branches/0.32@1662578 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp11
-rwxr-xr-xqpid/cpp/src/tests/ha_test.py22
2 files changed, 18 insertions, 15 deletions
diff --git a/qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp b/qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp
index 7be625a1a3..2037ba38ab 100644
--- a/qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp
+++ b/qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp
@@ -36,17 +36,20 @@ namespace sys {
struct ProtocolTimeoutTask : public sys::TimerTask {
AsynchIOHandler& handler;
std::string id;
+ Duration timeout;
- ProtocolTimeoutTask(const std::string& i, const Duration& timeout, AsynchIOHandler& h) :
- TimerTask(timeout, "ProtocolTimeout"),
+ ProtocolTimeoutTask(const std::string& i, const Duration& timeout_, AsynchIOHandler& h) :
+ TimerTask(timeout_, "ProtocolTimeout"),
handler(h),
- id(i)
+ id(i),
+ timeout(timeout_)
{}
void fire() {
// If this fires it means that we didn't negotiate the connection in the timeout period
// Schedule closing the connection for the io thread
- QPID_LOG(error, "Connection " << id << " No protocol received closing");
+ QPID_LOG(error, "Connection " << id << " No protocol received after " << timeout
+ << ", closing");
handler.abort();
}
};
diff --git a/qpid/cpp/src/tests/ha_test.py b/qpid/cpp/src/tests/ha_test.py
index 40ea3854c9..237216e208 100755
--- a/qpid/cpp/src/tests/ha_test.py
+++ b/qpid/cpp/src/tests/ha_test.py
@@ -129,9 +129,6 @@ class HaBroker(Broker):
args += ["--load-module", BrokerTest.ha_lib,
# Non-standard settings for faster tests.
"--link-maintenance-interval=0.1",
- # Heartbeat and negotiate time are needed so that a broker wont
- # stall on an address that doesn't currently have a broker running.
- "--max-negotiate-time=1000",
"--ha-cluster=%s"%ha_cluster]
# Add default --log-enable arguments unless args already has --log arguments.
if not [l for l in args if l.startswith("--log")]:
@@ -195,23 +192,26 @@ acl allow all all
def ha_status(self): return self.qmf().status
- def wait_status(self, status, timeout=5):
+ def wait_status(self, status, timeout=10):
+
def try_get_status():
self._status = "<unknown>"
- # Ignore ConnectionError, the broker may not be up yet.
try:
self._status = self.ha_status()
- return self._status == status;
- except qm.ConnectionError: return False
+ except qm.ConnectionError, e:
+ # Record the error but don't raise, the broker may not be up yet.
+ self._status = "%s: %s" % (type(e).__name__, e)
+ return self._status == status;
assert retry(try_get_status, timeout=timeout), "%s expected=%r, actual=%r"%(
self, status, self._status)
- def wait_queue(self, queue, timeout=1, msg="wait_queue"):
+ def wait_queue(self, queue, timeout=10, msg="wait_queue"):
""" Wait for queue to be visible via QMF"""
agent = self.agent
- assert retry(lambda: agent.getQueue(queue) is not None, timeout=timeout), msg+"queue %s not present"%queue
+ assert retry(lambda: agent.getQueue(queue) is not None, timeout=timeout), \
+ "%s queue %s not present" % (msg, queue)
- def wait_no_queue(self, queue, timeout=1, msg="wait_no_queue"):
+ def wait_no_queue(self, queue, timeout=10, msg="wait_no_queue"):
""" Wait for queue to be invisible via QMF"""
agent = self.agent
assert retry(lambda: agent.getQueue(queue) is None, timeout=timeout), "%s: queue %s still present"%(msg,queue)
@@ -325,7 +325,7 @@ class HaCluster(object):
ha_port = self._ports[i]
b = HaBroker(ha_port.test, ha_port, brokers_url=self.url, name=name,
args=args, **self.kwargs)
- b.ready(timeout=5)
+ b.ready(timeout=10)
return b
def start(self):