diff options
author | Alan Conway <aconway@apache.org> | 2015-02-25 15:49:53 +0000 |
---|---|---|
committer | Alan Conway <aconway@apache.org> | 2015-02-25 15:49:53 +0000 |
commit | 8219f65028e4ce51962ab373cec8f77880e2e97b (patch) | |
tree | 0a48abe0b772ad6ae9ae6be261f272568e20e247 | |
parent | 911b6feb851f236678c9e3fbd9a6e2b4dd1e00a3 (diff) | |
download | qpid-python-8219f65028e4ce51962ab373cec8f77880e2e97b.tar.gz |
QPID-6413: Sporadic failure of HA tests causd by maxNegotiateTimeout
Increased maxNegotiateTimeout to the default (10 seconds). A smaller value speeds up
detection of non-running brokers on remote hosts, but this is not necessary for the tests.
Increased some other test timeouts and added some improved error reporting.
The occasional long (> 1 second) connection delays are caused by Cyrus SASL authentication.
Not clear why this takes so long, but that is a separate issue. Here's a client log excerpt
showing the delay.
2015-02-25 08:29:37.461299895 [Network] trace RECV [[127.0.0.1:34247-127.0.0.1:45983]]: Frame[BEbe; channel=0; {ConnectionStartBody: server-properties={qpid.federation_tag:V2:36:str16(77800bff-a176-46c1-917a-32f136dee650)}; mechanisms=str16{V2:9:str16(ANONYMOUS), V2:5:str16(PLAIN)}; locales=str16{V2:5:str16(en_US)}; }]
2015-02-25 08:29:37.463116303 [Security] debug CyrusSasl::start(ANONYMOUS PLAIN)
(Note delay > 1 sec here)
2015-02-25 08:29:38.839793753 [Security] debug min_ssf: 0, max_ssf: 256
2015-02-25 08:29:38.839851781 [Security] debug CyrusSasl::start(ANONYMOUS PLAIN): selected ANONYMOUS response: 'anonymous@wallace'
2015-02-25 08:29:38.839963162 [Client] warning Connection [127.0.0.1:34247-127.0.0.1:45983] closed
git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@1662247 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp | 11 | ||||
-rwxr-xr-x | qpid/cpp/src/tests/ha_test.py | 22 |
2 files changed, 18 insertions, 15 deletions
diff --git a/qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp b/qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp index 7be625a1a3..2037ba38ab 100644 --- a/qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp +++ b/qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp @@ -36,17 +36,20 @@ namespace sys { struct ProtocolTimeoutTask : public sys::TimerTask { AsynchIOHandler& handler; std::string id; + Duration timeout; - ProtocolTimeoutTask(const std::string& i, const Duration& timeout, AsynchIOHandler& h) : - TimerTask(timeout, "ProtocolTimeout"), + ProtocolTimeoutTask(const std::string& i, const Duration& timeout_, AsynchIOHandler& h) : + TimerTask(timeout_, "ProtocolTimeout"), handler(h), - id(i) + id(i), + timeout(timeout_) {} void fire() { // If this fires it means that we didn't negotiate the connection in the timeout period // Schedule closing the connection for the io thread - QPID_LOG(error, "Connection " << id << " No protocol received closing"); + QPID_LOG(error, "Connection " << id << " No protocol received after " << timeout + << ", closing"); handler.abort(); } }; diff --git a/qpid/cpp/src/tests/ha_test.py b/qpid/cpp/src/tests/ha_test.py index 40ea3854c9..237216e208 100755 --- a/qpid/cpp/src/tests/ha_test.py +++ b/qpid/cpp/src/tests/ha_test.py @@ -129,9 +129,6 @@ class HaBroker(Broker): args += ["--load-module", BrokerTest.ha_lib, # Non-standard settings for faster tests. "--link-maintenance-interval=0.1", - # Heartbeat and negotiate time are needed so that a broker wont - # stall on an address that doesn't currently have a broker running. - "--max-negotiate-time=1000", "--ha-cluster=%s"%ha_cluster] # Add default --log-enable arguments unless args already has --log arguments. if not [l for l in args if l.startswith("--log")]: @@ -195,23 +192,26 @@ acl allow all all def ha_status(self): return self.qmf().status - def wait_status(self, status, timeout=5): + def wait_status(self, status, timeout=10): + def try_get_status(): self._status = "<unknown>" - # Ignore ConnectionError, the broker may not be up yet. try: self._status = self.ha_status() - return self._status == status; - except qm.ConnectionError: return False + except qm.ConnectionError, e: + # Record the error but don't raise, the broker may not be up yet. + self._status = "%s: %s" % (type(e).__name__, e) + return self._status == status; assert retry(try_get_status, timeout=timeout), "%s expected=%r, actual=%r"%( self, status, self._status) - def wait_queue(self, queue, timeout=1, msg="wait_queue"): + def wait_queue(self, queue, timeout=10, msg="wait_queue"): """ Wait for queue to be visible via QMF""" agent = self.agent - assert retry(lambda: agent.getQueue(queue) is not None, timeout=timeout), msg+"queue %s not present"%queue + assert retry(lambda: agent.getQueue(queue) is not None, timeout=timeout), \ + "%s queue %s not present" % (msg, queue) - def wait_no_queue(self, queue, timeout=1, msg="wait_no_queue"): + def wait_no_queue(self, queue, timeout=10, msg="wait_no_queue"): """ Wait for queue to be invisible via QMF""" agent = self.agent assert retry(lambda: agent.getQueue(queue) is None, timeout=timeout), "%s: queue %s still present"%(msg,queue) @@ -325,7 +325,7 @@ class HaCluster(object): ha_port = self._ports[i] b = HaBroker(ha_port.test, ha_port, brokers_url=self.url, name=name, args=args, **self.kwargs) - b.ready(timeout=5) + b.ready(timeout=10) return b def start(self): |