summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClay Gerrard <clay.gerrard@gmail.com>2016-10-11 13:23:11 -0700
committerOndřej Nový <ondrej.novy@firma.seznam.cz>2016-11-18 12:23:15 +0100
commit1f07ccb9bc015f030b5a50d0bf8d63423c7af029 (patch)
treea04ff93e95772d1b2ca00f09c6dab3c8b0a23b4b
parent7c786381c8ead2595ab34a61ac21a4118ed5abc7 (diff)
downloadswift-1f07ccb9bc015f030b5a50d0bf8d63423c7af029.tar.gz
Fix signal handling for daemons with InternalClient
The intentional use of "bare except" handling in catch_errors and some daemons to prevent propagation on unexpected errors that do not inherit from Exception (like eventlet.Timeout) or even BaseException (like old-style classes) has the side effect of spuriously "handling" *expected* errors like when a signal handler raises SystemExit. The signal handler installed in our Daemon is intended to ensure first that the entire process group and any forked processes (like rsync's) receive the SIGTERM signal and also that the process itself terminates. The use of sys.exit was not a concious grandiose plans for graceful shutdown (like the running[0] = False trick that wsgi server parent process do) - the desired behavior for SIGTERM is to stop - hard. This change ensures the original goals and intentions of our signal handler are fulfilled without the undesirable side effect that can cause our daemons to confusingly log an expected message to stop as an unexpected error, and start ignoring additional SIGTERM messages; forcing our kind operators to resort to brutal process murder. Closes-Bug: #1489209 Change-Id: I9d2886611f6db2498cd6a8f81a58f2a611f40905 (cherry picked from commit c2ce92a)
-rw-r--r--swift/common/daemon.py4
-rw-r--r--test/probe/test_signals.py (renamed from test/probe/test_wsgi_servers.py)50
-rw-r--r--test/unit/common/test_daemon.py31
3 files changed, 76 insertions, 9 deletions
diff --git a/swift/common/daemon.py b/swift/common/daemon.py
index 9a1402431..7e3007518 100644
--- a/swift/common/daemon.py
+++ b/swift/common/daemon.py
@@ -14,7 +14,6 @@
# limitations under the License.
import os
-import sys
import time
import signal
from re import sub
@@ -46,9 +45,10 @@ class Daemon(object):
utils.capture_stdio(self.logger, **kwargs)
def kill_children(*args):
+ self.logger.info('SIGTERM received')
signal.signal(signal.SIGTERM, signal.SIG_IGN)
os.killpg(0, signal.SIGTERM)
- sys.exit()
+ os._exit(0)
signal.signal(signal.SIGTERM, kill_children)
if once:
diff --git a/test/probe/test_wsgi_servers.py b/test/probe/test_signals.py
index 46175cf45..ea475c0d3 100644
--- a/test/probe/test_wsgi_servers.py
+++ b/test/probe/test_signals.py
@@ -17,6 +17,9 @@
import unittest
import random
+from contextlib import contextmanager
+
+import eventlet
from six.moves import http_client as httplib
@@ -101,5 +104,52 @@ class TestWSGIServerProcessHandling(unittest.TestCase):
self._check_reload(server, node['ip'], node['port'])
+@contextmanager
+def spawn_services(ip_ports, timeout=10):
+ q = eventlet.Queue()
+
+ def service(sock):
+ try:
+ conn, address = sock.accept()
+ q.put(address)
+ eventlet.sleep(timeout)
+ conn.close()
+ finally:
+ sock.close()
+
+ pool = eventlet.GreenPool()
+ for ip, port in ip_ports:
+ sock = eventlet.listen((ip, port))
+ pool.spawn(service, sock)
+
+ try:
+ yield q
+ finally:
+ for gt in list(pool.coroutines_running):
+ gt.kill()
+
+
+class TestHungDaemon(unittest.TestCase):
+
+ def setUp(self):
+ resetswift()
+ self.ip_ports = [
+ (dev['ip'], dev['port'])
+ for dev in Ring('/etc/swift', ring_name='account').devs
+ if dev
+ ]
+
+ def test_main(self):
+ reconciler = Manager(['container-reconciler'])
+ with spawn_services(self.ip_ports) as q:
+ reconciler.start()
+ # wait for the reconciler to connect
+ q.get()
+ # once it's hung in our connection - send it sig term
+ print('Attempting to stop reconciler!')
+ reconciler.stop()
+ self.assertEqual(1, reconciler.status())
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/unit/common/test_daemon.py b/test/unit/common/test_daemon.py
index bae3a146a..a42d64347 100644
--- a/test/unit/common/test_daemon.py
+++ b/test/unit/common/test_daemon.py
@@ -22,7 +22,8 @@ import unittest
from getpass import getuser
import logging
from test.unit import tmpfile
-from mock import patch
+import mock
+import signal
from swift.common import daemon, utils
@@ -83,10 +84,26 @@ class TestRunDaemon(unittest.TestCase):
d.run(once=True)
self.assertEqual(d.once_called, True)
+ def test_signal(self):
+ d = MyDaemon({})
+ with mock.patch('swift.common.daemon.signal') as mock_signal:
+ mock_signal.SIGTERM = signal.SIGTERM
+ d.run()
+ signal_args, kwargs = mock_signal.signal.call_args
+ sig, func = signal_args
+ self.assertEqual(sig, signal.SIGTERM)
+ with mock.patch('swift.common.daemon.os') as mock_os:
+ func()
+ self.assertEqual(mock_os.method_calls, [
+ mock.call.killpg(0, signal.SIGTERM),
+ # hard exit because bare except handlers can trap SystemExit
+ mock.call._exit(0)
+ ])
+
def test_run_daemon(self):
sample_conf = "[my-daemon]\nuser = %s\n" % getuser()
with tmpfile(sample_conf) as conf_file:
- with patch.dict('os.environ', {'TZ': ''}):
+ with mock.patch.dict('os.environ', {'TZ': ''}):
daemon.run_daemon(MyDaemon, conf_file)
self.assertEqual(MyDaemon.forever_called, True)
self.assertTrue(os.environ['TZ'] is not '')
@@ -94,17 +111,17 @@ class TestRunDaemon(unittest.TestCase):
self.assertEqual(MyDaemon.once_called, True)
# test raise in daemon code
- MyDaemon.run_once = MyDaemon.run_raise
- self.assertRaises(OSError, daemon.run_daemon, MyDaemon,
- conf_file, once=True)
+ with mock.patch.object(MyDaemon, 'run_once', MyDaemon.run_raise):
+ self.assertRaises(OSError, daemon.run_daemon, MyDaemon,
+ conf_file, once=True)
# test user quit
- MyDaemon.run_forever = MyDaemon.run_quit
sio = StringIO()
logger = logging.getLogger('server')
logger.addHandler(logging.StreamHandler(sio))
logger = utils.get_logger(None, 'server', log_route='server')
- daemon.run_daemon(MyDaemon, conf_file, logger=logger)
+ with mock.patch.object(MyDaemon, 'run_forever', MyDaemon.run_quit):
+ daemon.run_daemon(MyDaemon, conf_file, logger=logger)
self.assertTrue('user quit' in sio.getvalue().lower())