Merge "Seamlessly reload servers with SIGUSR1"

author: Zuul <zuul@review.opendev.org> 2019-11-14 20:34:48 +0000
committer: Gerrit Code Review <review@openstack.org> 2019-11-14 20:34:48 +0000
commit: 9fa0b211a9b48c6fa00654386a615094f750a151 (patch)
tree: fddfbe3411070019eee35883799e003efe2b999f
parent: cf33b3dac78edd5830217dea87c126f14f4c6fce (diff)
parent: 1107f24179c0c6fdcb58771f3a6e6c025352b5d3 (diff)
download: swift-9fa0b211a9b48c6fa00654386a615094f750a151.tar.gz
10 files changed, 553 insertions, 172 deletions
diff --git a/doc/manpages/swift-init.1 b/doc/manpages/swift-init.1
index 95b67ab5c..c056e04fe 100644
--- a/doc/manpages/swift-init.1
+++ b/doc/manpages/swift-init.1
@@ -87,6 +87,7 @@ allows one to use the keywords such as "all", "main" and "rest" for the <server>
 .IP "\fIno-wait\fR: \t\t\t spawn server and return immediately"
 .IP "\fIonce\fR: \t\t\t start server and run one pass on supporting daemons"
 .IP "\fIreload\fR: \t\t\t graceful shutdown then restart on supporting servers"
+.IP "\fIreload-seamless\fR: \t\t reload supporting servers with no downtime"
 .IP "\fIrestart\fR: \t\t\t stops then restarts server"
 .IP "\fIshutdown\fR: \t\t allow current requests to finish on supporting servers"
 .IP "\fIstart\fR: \t\t\t starts a server"
diff --git a/doc/source/admin_guide.rst b/doc/source/admin_guide.rst
index d0b74f7b1..747bfdb11 100644
--- a/doc/source/admin_guide.rst
+++ b/doc/source/admin_guide.rst
@@ -1362,20 +1362,29 @@ Swift services are generally managed with ``swift-init``. the general usage is
 ``swift-init <service> <command>``, where service is the Swift service to
 manage (for example object, container, account, proxy) and command is one of:
 
-==========  ===============================================
-Command     Description
-----------  -----------------------------------------------
-start       Start the service
-stop        Stop the service
-restart     Restart the service
-shutdown    Attempt to gracefully shutdown the service
-reload      Attempt to gracefully restart the service
-==========  ===============================================
-
-A graceful shutdown or reload will finish any current requests before
-completely stopping the old service.  There is also a special case of
-``swift-init all <command>``, which will run the command for all swift
-services.
+===============  ===============================================
+Command          Description
+---------------  -----------------------------------------------
+start            Start the service
+stop             Stop the service
+restart          Restart the service
+shutdown         Attempt to gracefully shutdown the service
+reload           Attempt to gracefully restart the service
+reload-seamless  Attempt to seamlessly restart the service
+===============  ===============================================
+
+A graceful shutdown or reload will allow all server workers to finish any
+current requests before exiting.  The parent server process exits immediately.
+
+A seamless reload will make new configuration settings active, with no window
+where client requests fail due to there being no active listen socket.
+The parent server process will re-exec itself, retaining its existing PID.
+After the re-exec'ed parent server process binds its listen sockets, the old
+listen sockets are closed and old server workers finish any current requests
+before exiting.
+
+There is also a special case of ``swift-init all <command>``, which will run
+the command for all swift services.
 
 In cases where there are multiple configs for a service, a specific config
 can be managed with ``swift-init <service>.<config> <command>``.
diff --git a/swift/common/daemon.py b/swift/common/daemon.py
index 7e7c4cd8f..53b409909 100644
--- a/swift/common/daemon.py
+++ b/swift/common/daemon.py
@@ -132,6 +132,7 @@ class DaemonStrategy(object):
     def setup(self, **kwargs):
         utils.validate_configuration()
         utils.drop_privileges(self.daemon.conf.get('user', 'swift'))
+        utils.clean_up_daemon_hygiene()
         utils.capture_stdio(self.logger, **kwargs)
 
         def kill_children(*args):
diff --git a/swift/common/manager.py b/swift/common/manager.py
index 47f47d03b..698fd4cb1 100644
--- a/swift/common/manager.py
+++ b/swift/common/manager.py
@@ -46,6 +46,7 @@ REST_SERVERS = [s for s in ALL_SERVERS if s not in MAIN_SERVERS]
 # aliases mapping
 ALIASES = {'all': ALL_SERVERS, 'main': MAIN_SERVERS, 'rest': REST_SERVERS}
 GRACEFUL_SHUTDOWN_SERVERS = MAIN_SERVERS
+SEAMLESS_SHUTDOWN_SERVERS = MAIN_SERVERS
 START_ONCE_SERVERS = REST_SERVERS
 # These are servers that match a type (account-*, container-*, object-*) but
 # don't use that type-server.conf file and instead use their own.
@@ -366,6 +367,21 @@ class Manager(object):
         return status
 
     @command
+    def reload_seamless(self, **kwargs):
+        """seamlessly re-exec, then shutdown of old listen sockets on
+           supporting servers
+        """
+        kwargs.pop('graceful', None)
+        kwargs['seamless'] = True
+        status = 0
+        for server in self.servers:
+            signaled_pids = server.stop(**kwargs)
+            if not signaled_pids:
+                print(_('No %s running') % server)
+                status += 1
+        return status
+
+    @command
     def force_reload(self, **kwargs):
         """alias for reload
         """
@@ -628,13 +644,17 @@ class Server(object):
         """Kill running pids
 
         :param graceful: if True, attempt SIGHUP on supporting servers
+        :param seamless: if True, attempt SIGUSR1 on supporting servers
 
         :returns: a dict mapping pids (ints) to pid_files (paths)
 
         """
         graceful = kwargs.get('graceful')
+        seamless = kwargs.get('seamless')
         if graceful and self.server in GRACEFUL_SHUTDOWN_SERVERS:
             sig = signal.SIGHUP
+        elif seamless and self.server in SEAMLESS_SHUTDOWN_SERVERS:
+            sig = signal.SIGUSR1
         else:
             sig = signal.SIGTERM
         return self.signal_pids(sig, **kwargs)
diff --git a/swift/common/utils.py b/swift/common/utils.py
index 51b656c5c..e4bb964bc 100644
--- a/swift/common/utils.py
+++ b/swift/common/utils.py
@@ -2452,7 +2452,7 @@ def get_hub():
         return None
 
 
-def drop_privileges(user, call_setsid=True):
+def drop_privileges(user):
     """
     Sets the userid/groupid of the current process, get session leader, etc.
 
@@ -2465,11 +2465,13 @@ def drop_privileges(user, call_setsid=True):
     os.setgid(user[3])
     os.setuid(user[2])
     os.environ['HOME'] = user[5]
-    if call_setsid:
-        try:
-            os.setsid()
-        except OSError:
-            pass
+
+
+def clean_up_daemon_hygiene():
+    try:
+        os.setsid()
+    except OSError:
+        pass
     os.chdir('/')   # in case you need to rmdir on where you started the daemon
     os.umask(0o22)  # ensure files are created with the correct privileges
 
diff --git a/swift/common/wsgi.py b/swift/common/wsgi.py
index 10a8b3e56..c71d41286 100644
--- a/swift/common/wsgi.py
+++ b/swift/common/wsgi.py
@@ -18,15 +18,18 @@
 from __future__ import print_function
 
 import errno
+import fcntl
 import os
 import signal
-import time
 from swift import gettext_ as _
+import sys
 from textwrap import dedent
+import time
 
 import eventlet
 import eventlet.debug
-from eventlet import greenio, GreenPool, sleep, wsgi, listen, Timeout
+from eventlet import greenio, GreenPool, sleep, wsgi, listen, Timeout, \
+    websocket
 from paste.deploy import loadwsgi
 from eventlet.green import socket, ssl, os as green_os
 from io import BytesIO
@@ -42,10 +45,11 @@ from swift.common.swob import Request, wsgi_quote, wsgi_unquote, \
 from swift.common.utils import capture_stdio, disable_fallocate, \
     drop_privileges, get_logger, NullLogger, config_true_value, \
     validate_configuration, get_hub, config_auto_int_value, \
-    reiterate
+    reiterate, clean_up_daemon_hygiene
 
 SIGNUM_TO_NAME = {getattr(signal, n): n for n in dir(signal)
                   if n.startswith('SIG') and '_' not in n}
+NOTIFY_FD_ENV_KEY = '__SWIFT_SERVER_NOTIFY_FD'
 
 # Set maximum line size of message headers to be accepted.
 wsgi.MAX_HEADER_LINE = constraints.MAX_HEADER_SIZE
@@ -422,6 +426,13 @@ def load_app_config(conf_file):
 class SwiftHttpProtocol(wsgi.HttpProtocol):
     default_request_version = "HTTP/1.0"
 
+    def __init__(self, *args, **kwargs):
+        # See https://github.com/eventlet/eventlet/pull/590
+        self.pre_shutdown_bugfix_eventlet = not getattr(
+            websocket.WebSocketWSGI, '_WSGI_APP_ALWAYS_IDLE', None)
+        # Note this is not a new-style class, so super() won't work
+        wsgi.HttpProtocol.__init__(self, *args, **kwargs)
+
     def log_request(self, *a):
         """
         Turn off logging requests by the underlying WSGI software.
@@ -528,6 +539,23 @@ class SwiftHttpProtocol(wsgi.HttpProtocol):
                         b'HTTP/1.1 100 Continue\r\n'
             return environ
 
+    def _read_request_line(self):
+        # Note this is not a new-style class, so super() won't work
+        got = wsgi.HttpProtocol._read_request_line(self)
+        # See https://github.com/eventlet/eventlet/pull/590
+        if self.pre_shutdown_bugfix_eventlet:
+            self.conn_state[2] = wsgi.STATE_REQUEST
+        return got
+
+    def handle_one_request(self):
+        # Note this is not a new-style class, so super() won't work
+        got = wsgi.HttpProtocol.handle_one_request(self)
+        # See https://github.com/eventlet/eventlet/pull/590
+        if self.pre_shutdown_bugfix_eventlet:
+            if self.conn_state[2] != wsgi.STATE_CLOSE:
+                self.conn_state[2] = wsgi.STATE_IDLE
+        return got
+
 
 class SwiftHttpProxiedProtocol(SwiftHttpProtocol):
     """
@@ -662,7 +690,36 @@ def run_server(conf, logger, sock, global_conf=None):
     pool.waitall()
 
 
-class WorkersStrategy(object):
+class StrategyBase(object):
+    """
+    Some operations common to all strategy classes.
+    """
+
+    def shutdown_sockets(self):
+        """
+        Shutdown any listen sockets.
+        """
+
+        for sock in self.iter_sockets():
+            greenio.shutdown_safe(sock)
+            sock.close()
+
+    def set_close_on_exec_on_listen_sockets(self):
+        """
+        Set the close-on-exec flag on any listen sockets.
+        """
+
+        for sock in self.iter_sockets():
+            if six.PY2:
+                fcntl.fcntl(sock.fileno(), fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+            else:
+                # Python 3.4 and later default to sockets having close-on-exec
+                # set (what PEP 0446 calls "non-inheritable").  This new method
+                # on socket objects is provided to toggle it.
+                sock.set_inheritable(False)
+
+
+class WorkersStrategy(StrategyBase):
     """
     WSGI server management strategy object for a single bind port and listen
     socket shared by a configured number of forked-off workers.
@@ -695,8 +752,7 @@ class WorkersStrategy(object):
 
     def do_bind_ports(self):
         """
-        Bind the one listen socket for this strategy and drop privileges
-        (since the parent process will never need to bind again).
+        Bind the one listen socket for this strategy.
         """
 
         try:
@@ -705,7 +761,6 @@ class WorkersStrategy(object):
             msg = 'bind_port wasn\'t properly set in the config file. ' \
                 'It must be explicitly set to a valid port number.'
             return msg
-        drop_privileges(self.conf.get('user', 'swift'))
 
     def no_fork_sock(self):
         """
@@ -766,20 +821,27 @@ class WorkersStrategy(object):
         """
         Called when a worker has exited.
 
+        NOTE: a re-exec'ed server can reap the dead worker PIDs from the old
+        server process that is being replaced as part of a service reload
+        (SIGUSR1).  So we need to be robust to getting some unknown PID here.
+
         :param int pid: The PID of the worker that exited.
         """
 
-        self.logger.error('Removing dead child %s from parent %s',
-                          pid, os.getpid())
-        self.children.remove(pid)
+        if pid in self.children:
+            self.logger.error('Removing dead child %s from parent %s',
+                              pid, os.getpid())
+            self.children.remove(pid)
+        else:
+            self.logger.info('Ignoring wait() result from unknown PID %s', pid)
 
-    def shutdown_sockets(self):
+    def iter_sockets(self):
         """
-        Shutdown any listen sockets.
+        Yields all known listen sockets.
         """
 
-        greenio.shutdown_safe(self.sock)
-        self.sock.close()
+        if self.sock:
+            yield self.sock
 
 
 class PortPidState(object):
@@ -901,7 +963,7 @@ class PortPidState(object):
         self.sock_data_by_port[dead_port]['pids'][server_idx] = None
 
 
-class ServersPerPortStrategy(object):
+class ServersPerPortStrategy(StrategyBase):
     """
     WSGI server management strategy object for an object-server with one listen
     port per unique local port in the storage policy rings.  The
@@ -948,28 +1010,13 @@ class ServersPerPortStrategy(object):
 
     def do_bind_ports(self):
         """
-        Bind one listen socket per unique local storage policy ring port.  Then
-        do all the work of drop_privileges except the actual dropping of
-        privileges (each forked-off worker will do that post-fork in
-        :py:meth:`post_fork_hook`).
+        Bind one listen socket per unique local storage policy ring port.
         """
 
         self._reload_bind_ports()
         for port in self.bind_ports:
             self._bind_port(port)
 
-        # The workers strategy drops privileges here, which we obviously cannot
-        # do if we want to support binding to low ports.  But we do want some
-        # of the actions that drop_privileges did.
-        try:
-            os.setsid()
-        except OSError:
-            pass
-        # In case you need to rmdir where you started the daemon:
-        os.chdir('/')
-        # Ensure files are created with the correct privileges:
-        os.umask(0o22)
-
     def no_fork_sock(self):
         """
         This strategy does not support running in the foreground.
@@ -1030,7 +1077,7 @@ class ServersPerPortStrategy(object):
         to drop privileges.
         """
 
-        drop_privileges(self.conf.get('user', 'swift'), call_setsid=False)
+        drop_privileges(self.conf.get('user', 'swift'))
 
     def log_sock_exit(self, sock, server_idx):
         """
@@ -1050,6 +1097,7 @@ class ServersPerPortStrategy(object):
                            :py:meth:`new_worker_socks`.
         :param int pid: The new worker process' PID
         """
+
         port = self.port_pid_state.port_for_sock(sock)
         self.logger.notice('Started child %d (PID %d) for port %d',
                            server_idx, pid, port)
@@ -1064,14 +1112,13 @@ class ServersPerPortStrategy(object):
 
         self.port_pid_state.forget_pid(pid)
 
-    def shutdown_sockets(self):
+    def iter_sockets(self):
         """
-        Shutdown any listen sockets.
+        Yields all known listen sockets.
         """
 
         for sock in self.port_pid_state.all_socks():
-            greenio.shutdown_safe(sock)
-            sock.close()
+            yield sock
 
 
 def run_wsgi(conf_path, app_section, *args, **kwargs):
@@ -1127,10 +1174,22 @@ def run_wsgi(conf_path, app_section, *args, **kwargs):
         print(error_msg)
         return 1
 
+    # Do some daemonization process hygene before we fork any children or run a
+    # server without forking.
+    clean_up_daemon_hygiene()
+
     # Redirect errors to logger and close stdio. Do this *after* binding ports;
     # we use this to signal that the service is ready to accept connections.
     capture_stdio(logger)
 
+    # If necessary, signal an old copy of us that it's okay to shutdown its
+    # listen sockets now because ours are up and ready to receive connections.
+    reexec_signal_fd = os.getenv(NOTIFY_FD_ENV_KEY)
+    if reexec_signal_fd:
+        reexec_signal_fd = int(reexec_signal_fd)
+        os.write(reexec_signal_fd, str(os.getpid()).encode('utf8'))
+        os.close(reexec_signal_fd)
+
     no_fork_sock = strategy.no_fork_sock()
     if no_fork_sock:
         run_server(conf, logger, no_fork_sock, global_conf=global_conf)
@@ -1145,6 +1204,7 @@ def run_wsgi(conf_path, app_section, *args, **kwargs):
     running_context = [True, None]
     signal.signal(signal.SIGTERM, stop_with_signal)
     signal.signal(signal.SIGHUP, stop_with_signal)
+    signal.signal(signal.SIGUSR1, stop_with_signal)
 
     while running_context[0]:
         for sock, sock_info in strategy.new_worker_socks():
@@ -1152,6 +1212,7 @@ def run_wsgi(conf_path, app_section, *args, **kwargs):
             if pid == 0:
                 signal.signal(signal.SIGHUP, signal.SIG_DFL)
                 signal.signal(signal.SIGTERM, signal.SIG_DFL)
+                signal.signal(signal.SIGUSR1, signal.SIG_DFL)
                 strategy.post_fork_hook()
                 run_server(conf, logger, sock)
                 strategy.log_sock_exit(sock, sock_info)
@@ -1196,9 +1257,58 @@ def run_wsgi(conf_path, app_section, *args, **kwargs):
             logger.error('Stopping with unexpected signal %r' %
                          running_context[1])
         else:
-            logger.error('%s received', signame)
+            logger.error('%s received (%s)', signame, os.getpid())
     if running_context[1] == signal.SIGTERM:
         os.killpg(0, signal.SIGTERM)
+    elif running_context[1] == signal.SIGUSR1:
+        # set up a pipe, fork off a child to handle cleanup later,
+        # and rexec ourselves with an environment variable set which will
+        # indicate which fd (one of the pipe ends) to write a byte to
+        # to indicate listen socket setup is complete.  That will signal
+        # the forked-off child to complete its listen socket shutdown.
+        #
+        # NOTE: all strategies will now require the parent process to retain
+        # superuser privileges so that the re'execd process can bind a new
+        # socket to the configured IP & port(s).  We can't just reuse existing
+        # listen sockets because then the bind IP couldn't be changed.
+        #
+        # NOTE: we need to set all our listen sockets close-on-exec so the only
+        # open reference to those file descriptors will be in the forked-off
+        # child here who waits to shutdown the old server's listen sockets.  If
+        # the re-exec'ed server's old listen sockets aren't closed-on-exec,
+        # then the old server can't actually ever exit.
+        strategy.set_close_on_exec_on_listen_sockets()
+        read_fd, write_fd = os.pipe()
+        orig_server_pid = os.getpid()
+        child_pid = os.fork()
+        if child_pid:
+            # parent; set env var for fds and reexec ourselves
+            os.close(read_fd)
+            os.putenv(NOTIFY_FD_ENV_KEY, str(write_fd))
+            myself = os.path.realpath(sys.argv[0])
+            logger.info("Old server PID=%d re'execing as: %r",
+                        orig_server_pid, [myself] + list(sys.argv))
+            os.execv(myself, sys.argv)
+            logger.error('Somehow lived past os.execv()?!')
+            exit('Somehow lived past os.execv()?!')
+        elif child_pid == 0:
+            # child
+            os.close(write_fd)
+            logger.info('Old server temporary child PID=%d waiting for '
+                        "re-exec'ed PID=%d to signal readiness...",
+                        os.getpid(), orig_server_pid)
+            try:
+                got_pid = os.read(read_fd, 30)
+                logger.info('Old server temporary child PID=%d notified '
+                            'to shutdown old listen sockets by PID=%s',
+                            os.getpid(), got_pid)
+            except Exception as e:
+                logger.warning('Unexpected exception while reading from '
+                               'pipe:', exc_info=True)
+            try:
+                os.close(read_fd)
+            except Exception:
+                pass
 
     strategy.shutdown_sockets()
     signal.signal(signal.SIGTERM, signal.SIG_IGN)
diff --git a/test/probe/test_signals.py b/test/probe/test_signals.py
index bfc6e299a..dbb3b01f4 100644
--- a/test/probe/test_signals.py
+++ b/test/probe/test_signals.py
@@ -16,10 +16,14 @@
 
 import unittest
 
-import random
 from contextlib import contextmanager
-
 import eventlet
+import json
+import os
+import random
+import shutil
+import time
+from uuid import uuid4
 
 from six.moves import http_client as httplib
 
@@ -27,7 +31,7 @@ from swift.common.storage_policy import POLICIES
 from swift.common.ring import Ring
 from swift.common.manager import Manager
 
-from test.probe.common import resetswift
+from test.probe.common import resetswift, ReplProbeTest, client
 
 
 def putrequest(conn, method, path, headers):
@@ -39,77 +43,311 @@ def putrequest(conn, method, path, headers):
     conn.endheaders()
 
 
-class TestWSGIServerProcessHandling(unittest.TestCase):
+def get_server_and_worker_pids(manager, old_workers=None):
+    # Gets all the server parent pids, as well as the set of all worker PIDs
+    # (i.e. any PID whose PPID is in the set of parent pids).
+    server_pid_set = {pid for server in manager.servers
+                      for (_, pid) in server.iter_pid_files()}
+    children_pid_set = set()
+    old_worker_pid_set = set(old_workers or [])
+    all_pids = [int(f) for f in os.listdir('/proc') if f.isdigit()]
+    for pid in all_pids:
+        try:
+            with open('/proc/%d/status' % pid, 'r') as fh:
+                for line in fh:
+                    if line.startswith('PPid:\t'):
+                        ppid = int(line[6:])
+                        if ppid in server_pid_set or pid in old_worker_pid_set:
+                            children_pid_set.add(pid)
+                        break
+        except Exception:
+            # No big deal, a process could have exited since we listed /proc,
+            # so we just ignore errors
+            pass
+    return {'server': server_pid_set, 'worker': children_pid_set}
+
+
+def wait_for_pids(manager, callback, timeout=15, old_workers=None):
+    # Waits up to `timeout` seconds for the supplied callback to return True
+    # when passed in the manager's pid set.
+    start_time = time.time()
+
+    pid_sets = get_server_and_worker_pids(manager, old_workers=old_workers)
+    got = callback(pid_sets)
+    while not got and time.time() - start_time < timeout:
+        time.sleep(0.1)
+        pid_sets = get_server_and_worker_pids(manager, old_workers=old_workers)
+        got = callback(pid_sets)
+    if time.time() - start_time >= timeout:
+        raise AssertionError('timed out waiting for PID state; got %r' % (
+            pid_sets))
+    return pid_sets
+
+
+class TestWSGIServerProcessHandling(ReplProbeTest):
+    # Subclasses need to define SERVER_NAME
+    HAS_INFO = False
+    PID_TIMEOUT = 25
 
     def setUp(self):
-        resetswift()
-
-    def _check_reload(self, server_name, ip, port):
-        manager = Manager([server_name])
-        manager.start()
+        super(TestWSGIServerProcessHandling, self).setUp()
+        self.container = 'container-%s' % uuid4()
+        client.put_container(self.url, self.token, self.container,
+                             headers={'X-Storage-Policy':
+                                      self.policy.name})
+        self.manager = Manager([self.SERVER_NAME])
+        for server in self.manager.servers:
+            self.assertTrue(server.get_running_pids,
+                            'No running PIDs for %s' % server.cmd)
+        self.starting_pids = get_server_and_worker_pids(self.manager)
+
+    def assert4xx(self, resp):
+        self.assertEqual(resp.status // 100, 4)
+        got_body = resp.read()
+        try:
+            self.assertIn('resource could not be found', got_body)
+        except AssertionError:
+            self.assertIn('Invalid path: blah', got_body)
 
-        starting_pids = {pid for server in manager.servers
-                         for (_, pid) in server.iter_pid_files()}
+    def get_conn(self):
+        ip, port = self.get_ip_port()
+        return httplib.HTTPConnection('%s:%s' % (ip, port))
 
-        body = b'test' * 10
-        conn = httplib.HTTPConnection('%s:%s' % (ip, port))
+    def _check_reload(self):
+        conn = self.get_conn()
+        self.addCleanup(conn.close)
 
         # sanity request
-        putrequest(conn, 'PUT', 'blah',
-                   headers={'Content-Length': len(body)})
-        conn.send(body)
-        resp = conn.getresponse()
-        self.assertEqual(resp.status // 100, 4)
-        resp.read()
+        self.start_write_req(conn, 'sanity')
+        resp = self.finish_write_req(conn)
+        self.check_write_resp(resp)
 
-        # Start the request before reloading...
-        putrequest(conn, 'PUT', 'blah',
-                   headers={'Content-Length': len(body)})
+        if self.HAS_INFO:
+            self.check_info_value(8192)
 
-        manager.reload()
+        # Start another write request before reloading...
+        self.start_write_req(conn, 'across-reload')
 
-        post_reload_pids = {pid for server in manager.servers
-                            for (_, pid) in server.iter_pid_files()}
+        if self.HAS_INFO:
+            self.swap_configs()  # new server's max_header_size == 8191
 
-        # none of the pids we started with are being tracked after reload
-        msg = 'expected all pids from %r to have died, but found %r' % (
-            starting_pids, post_reload_pids)
-        self.assertFalse(starting_pids & post_reload_pids, msg)
+        self.do_reload()
 
-        # ... and make sure we can finish what we were doing, and even
-        # start part of a new request
-        conn.send(body)
-        resp = conn.getresponse()
-        self.assertEqual(resp.status // 100, 4)
-        # We can even read the body
-        self.assertTrue(resp.read())
+        wait_for_pids(self.manager, self.make_post_reload_pid_cb(),
+                      old_workers=self.starting_pids['worker'],
+                      timeout=self.PID_TIMEOUT)
+
+        # ... and make sure we can finish what we were doing
+        resp = self.finish_write_req(conn)
+        self.check_write_resp(resp)
 
         # After this, we're in a funny spot. With eventlet 0.22.0, the
         # connection's now closed, but with prior versions we could keep
         # going indefinitely. See https://bugs.launchpad.net/swift/+bug/1792615
 
-        # Close our connection, to make sure old eventlet shuts down
+        # Close our connections, to make sure old eventlet shuts down
         conn.close()
 
         # sanity
-        post_close_pids = {pid for server in manager.servers
-                           for (_, pid) in server.iter_pid_files()}
-        self.assertEqual(post_reload_pids, post_close_pids)
+        wait_for_pids(self.manager, self.make_post_close_pid_cb(),
+                      old_workers=self.starting_pids['worker'],
+                      timeout=self.PID_TIMEOUT)
+
+        if self.HAS_INFO:
+            self.check_info_value(8191)
+
+
+class OldReloadMixin(object):
+    def make_post_reload_pid_cb(self):
+        def _cb(post_reload_pids):
+            # We expect all old server PIDs to be gone, a new server present,
+            # and for there to be exactly 1 old worker PID plus additional new
+            # worker PIDs.
+            old_servers_dead = not (self.starting_pids['server'] &
+                                    post_reload_pids['server'])
+            one_old_worker = 1 == len(self.starting_pids['worker'] &
+                                      post_reload_pids['worker'])
+            new_workers_present = (post_reload_pids['worker'] -
+                                   self.starting_pids['worker'])
+            return (post_reload_pids['server'] and old_servers_dead and
+                    one_old_worker and new_workers_present)
+        return _cb
+
+    def make_post_close_pid_cb(self):
+        def _cb(post_close_pids):
+            # We expect all old server PIDs to be gone, a new server present,
+            # no old worker PIDs, and additional new worker PIDs.
+            old_servers_dead = not (self.starting_pids['server'] &
+                                    post_close_pids['server'])
+            old_workers_dead = not (self.starting_pids['worker'] &
+                                    post_close_pids['worker'])
+            new_workers_present = (post_close_pids['worker'] -
+                                   self.starting_pids['worker'])
+            return (post_close_pids['server'] and old_servers_dead and
+                    old_workers_dead and new_workers_present)
+        return _cb
+
+    def do_reload(self):
+        self.manager.reload()
+
+
+class SeamlessReloadMixin(object):
+    def make_post_reload_pid_cb(self):
+        def _cb(post_reload_pids):
+            # We expect all orig server PIDs to STILL BE PRESENT, no new server
+            # present, and for there to be exactly 1 old worker PID plus
+            # additional new worker PIDs.
+            same_servers = (self.starting_pids['server'] ==
+                            post_reload_pids['server'])
+            one_old_worker = 1 == len(self.starting_pids['worker'] &
+                                      post_reload_pids['worker'])
+            new_workers_present = (post_reload_pids['worker'] -
+                                   self.starting_pids['worker'])
+            return (post_reload_pids['server'] and same_servers and
+                    one_old_worker and new_workers_present)
+        return _cb
+
+    def make_post_close_pid_cb(self):
+        def _cb(post_close_pids):
+            # We expect all orig server PIDs to STILL BE PRESENT, no new server
+            # present, no old worker PIDs, and additional new worker PIDs.
+            same_servers = (self.starting_pids['server'] ==
+                            post_close_pids['server'])
+            old_workers_dead = not (self.starting_pids['worker'] &
+                                    post_close_pids['worker'])
+            new_workers_present = (post_close_pids['worker'] -
+                                   self.starting_pids['worker'])
+            return (post_close_pids['server'] and same_servers and
+                    old_workers_dead and new_workers_present)
+        return _cb
+
+    def do_reload(self):
+        self.manager.reload_seamless()
+
+
+class TestObjectServerReloadBase(TestWSGIServerProcessHandling):
+    SERVER_NAME = 'object'
+    PID_TIMEOUT = 35
+
+    def get_ip_port(self):
+        policy = random.choice(list(POLICIES))
+        policy.load_ring('/etc/swift')
+        self.ring_node = random.choice(policy.object_ring.get_part_nodes(1))
+        return self.ring_node['ip'], self.ring_node['port']
+
+    def start_write_req(self, conn, suffix):
+        putrequest(conn, 'PUT', '/%s/123/%s/%s/blah-%s' % (
+            self.ring_node['device'], self.account, self.container, suffix),
+            headers={'X-Timestamp': str(time.time()),
+                     'Content-Type': 'application/octet-string',
+                     'Content-Length': len(self.BODY)})
+
+    def finish_write_req(self, conn):
+        conn.send(self.BODY)
+        return conn.getresponse()
+
+    def check_write_resp(self, resp):
+        got_body = resp.read()
+        self.assertEqual(resp.status // 100, 2, 'Got status %d; %r' %
+                         (resp.status, got_body))
+        self.assertEqual('', got_body)
+        return resp
 
-    def test_proxy_reload(self):
-        self._check_reload('proxy-server', 'localhost', 8080)
+
+class TestObjectServerReload(OldReloadMixin, TestObjectServerReloadBase):
+    BODY = 'test-object' * 10
 
     def test_object_reload(self):
-        policy = random.choice(list(POLICIES))
-        policy.load_ring('/etc/swift')
-        node = random.choice(policy.object_ring.get_part_nodes(1))
-        self._check_reload('object', node['ip'], node['port'])
-
-    def test_account_container_reload(self):
-        for server in ('account', 'container'):
-            ring = Ring('/etc/swift', ring_name=server)
-            node = random.choice(ring.get_part_nodes(1))
-            self._check_reload(server, node['ip'], node['port'])
+        self._check_reload()
+
+
+class TestObjectServerReloadSeamless(SeamlessReloadMixin,
+                                     TestObjectServerReloadBase):
+    BODY = 'test-object' * 10
+
+    def test_object_reload_seamless(self):
+        self._check_reload()
+
+
+class TestProxyServerReloadBase(TestWSGIServerProcessHandling):
+    SERVER_NAME = 'proxy-server'
+    HAS_INFO = True
+
+    def setUp(self):
+        super(TestProxyServerReloadBase, self).setUp()
+        self.swift_conf_path = '/etc/swift/swift.conf'
+        self.new_swift_conf_path = self.swift_conf_path + '.new'
+        self.saved_swift_conf_path = self.swift_conf_path + '.orig'
+        shutil.copy(self.swift_conf_path, self.saved_swift_conf_path)
+        shutil.copy(self.swift_conf_path, self.new_swift_conf_path)
+        with open(self.new_swift_conf_path, 'a+') as fh:
+            fh.seek(0, os.SEEK_END)
+            fh.write('\n[swift-constraints]\nmax_header_size = 8191\n')
+            fh.flush()
+
+    def tearDown(self):
+        shutil.move(self.saved_swift_conf_path, self.swift_conf_path)
+        try:
+            os.unlink(self.new_swift_conf_path)
+        except OSError:
+            pass
+        super(TestProxyServerReloadBase, self).tearDown()
+
+    def swap_configs(self):
+        shutil.copy(self.new_swift_conf_path, self.swift_conf_path)
+
+    def get_ip_port(self):
+        return 'localhost', 8080
+
+    def assertMaxHeaderSize(self, resp, exp_max_header_size):
+        self.assertEqual(resp.status // 100, 2)
+        info_dict = json.loads(resp.read())
+        self.assertEqual(exp_max_header_size,
+                         info_dict['swift']['max_header_size'])
+
+    def check_info_value(self, expected_value):
+        # show that we're talking to the original server with the default
+        # max_header_size == 8192
+        conn2 = self.get_conn()
+        putrequest(conn2, 'GET', '/info',
+                   headers={'Content-Length': '0',
+                            'Accept': 'application/json'})
+        conn2.send('')
+        resp = conn2.getresponse()
+        self.assertMaxHeaderSize(resp, expected_value)
+        conn2.close()
+
+    def start_write_req(self, conn, suffix):
+        putrequest(conn, 'PUT', '/v1/%s/%s/blah-%s' % (
+            self.account, self.container, suffix),
+            headers={'X-Auth-Token': self.token,
+                     'Content-Length': len(self.BODY)})
+
+    def finish_write_req(self, conn):
+        conn.send(self.BODY)
+        return conn.getresponse()
+
+    def check_write_resp(self, resp):
+        got_body = resp.read()
+        self.assertEqual(resp.status // 100, 2, 'Got status %d; %r' %
+                         (resp.status, got_body))
+        self.assertEqual('', got_body)
+        return resp
+
+
+class TestProxyServerReload(OldReloadMixin, TestProxyServerReloadBase):
+    BODY = 'proxy' * 10
+
+    def test_proxy_reload(self):
+        self._check_reload()
+
+
+class TestProxyServerReloadSeamless(SeamlessReloadMixin,
+                                    TestProxyServerReloadBase):
+    BODY = 'proxy-seamless' * 10
+
+    def test_proxy_reload_seamless(self):
+        self._check_reload()
 
 
 @contextmanager
diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py
index 448ae66a8..9d4657cca 100644
--- a/test/unit/common/test_utils.py
+++ b/test/unit/common/test_utils.py
@@ -2285,15 +2285,16 @@ log_name = %(yarr)s'''
         }
         self.assertEqual(conf, expected)
 
-    def _check_drop_privileges(self, mock_os, required_func_calls,
-                               call_setsid=True):
+    def test_drop_privileges(self):
+        required_func_calls = ('setgroups', 'setgid', 'setuid')
+        mock_os = MockOs(called_funcs=required_func_calls)
         user = getuser()
         user_data = pwd.getpwnam(user)
         self.assertFalse(mock_os.called_funcs)  # sanity check
         # over-ride os with mock
         with mock.patch('swift.common.utils.os', mock_os):
             # exercise the code
-            utils.drop_privileges(user, call_setsid=call_setsid)
+            utils.drop_privileges(user)
 
         for func in required_func_calls:
             self.assertIn(func, mock_os.called_funcs)
@@ -2302,34 +2303,41 @@ log_name = %(yarr)s'''
         self.assertEqual(groups, set(mock_os.called_funcs['setgroups'][0]))
         self.assertEqual(user_data[3], mock_os.called_funcs['setgid'][0])
         self.assertEqual(user_data[2], mock_os.called_funcs['setuid'][0])
-        self.assertEqual('/', mock_os.called_funcs['chdir'][0])
-        self.assertEqual(0o22, mock_os.called_funcs['umask'][0])
 
-    def test_drop_privileges(self):
-        required_func_calls = ('setgroups', 'setgid', 'setuid', 'setsid',
-                               'chdir', 'umask')
+    def test_drop_privileges_no_setgroups(self):
+        required_func_calls = ('geteuid', 'setgid', 'setuid')
         mock_os = MockOs(called_funcs=required_func_calls)
-        self._check_drop_privileges(mock_os, required_func_calls)
+        user = getuser()
+        user_data = pwd.getpwnam(user)
+        self.assertFalse(mock_os.called_funcs)  # sanity check
+        # over-ride os with mock
+        with mock.patch('swift.common.utils.os', mock_os):
+            # exercise the code
+            utils.drop_privileges(user)
 
-    def test_drop_privileges_setsid_error(self):
-        # OSError trying to get session leader
-        required_func_calls = ('setgroups', 'setgid', 'setuid', 'setsid',
-                               'chdir', 'umask')
-        mock_os = MockOs(called_funcs=required_func_calls,
-                         raise_funcs=('setsid',))
-        self._check_drop_privileges(mock_os, required_func_calls)
+        for func in required_func_calls:
+            self.assertIn(func, mock_os.called_funcs)
+        self.assertNotIn('setgroups', mock_os.called_funcs)
+        self.assertEqual(user_data[5], mock_os.environ['HOME'])
+        self.assertEqual(user_data[3], mock_os.called_funcs['setgid'][0])
+        self.assertEqual(user_data[2], mock_os.called_funcs['setuid'][0])
 
-    def test_drop_privileges_no_call_setsid(self):
-        required_func_calls = ('setgroups', 'setgid', 'setuid', 'chdir',
-                               'umask')
-        # OSError if trying to get session leader, but it shouldn't be called
+    def test_clean_up_daemon_hygene(self):
+        required_func_calls = ('chdir', 'umask')
+        # OSError if trying to get session leader, but setsid() OSError is
+        # ignored by the code under test.
         bad_func_calls = ('setsid',)
         mock_os = MockOs(called_funcs=required_func_calls,
                          raise_funcs=bad_func_calls)
-        self._check_drop_privileges(mock_os, required_func_calls,
-                                    call_setsid=False)
+        with mock.patch('swift.common.utils.os', mock_os):
+            # exercise the code
+            utils.clean_up_daemon_hygiene()
+        for func in required_func_calls:
+            self.assertIn(func, mock_os.called_funcs)
         for func in bad_func_calls:
-            self.assertNotIn(func, mock_os.called_funcs)
+            self.assertIn(func, mock_os.called_funcs)
+        self.assertEqual('/', mock_os.called_funcs['chdir'][0])
+        self.assertEqual(0o22, mock_os.called_funcs['umask'][0])
 
     @reset_logger_state
     def test_capture_stdio(self):
diff --git a/test/unit/common/test_wsgi.py b/test/unit/common/test_wsgi.py
index dd3ceff25..b72c41550 100644
--- a/test/unit/common/test_wsgi.py
+++ b/test/unit/common/test_wsgi.py
@@ -836,7 +836,8 @@ class TestWSGI(unittest.TestCase):
 
         with mock.patch.object(wsgi, '_initrp', _initrp), \
                 mock.patch.object(wsgi, 'get_socket'), \
-                mock.patch.object(wsgi, 'drop_privileges'), \
+                mock.patch.object(wsgi, 'drop_privileges') as _d_privs, \
+                mock.patch.object(wsgi, 'clean_up_daemon_hygiene') as _c_hyg, \
                 mock.patch.object(wsgi, 'loadapp', _loadapp), \
                 mock.patch.object(wsgi, 'capture_stdio'), \
                 mock.patch.object(wsgi, 'run_server'), \
@@ -849,6 +850,10 @@ class TestWSGI(unittest.TestCase):
                                                            socket=True,
                                                            select=True,
                                                            thread=True)
+        # run_wsgi() no longer calls drop_privileges() in the parent process,
+        # just clean_up_deemon_hygene()
+        self.assertEqual([], _d_privs.mock_calls)
+        self.assertEqual([mock.call()], _c_hyg.mock_calls)
 
     @mock.patch('swift.common.wsgi.run_server')
     @mock.patch('swift.common.wsgi.WorkersStrategy')
@@ -1353,36 +1358,11 @@ class TestServersPerPortStrategy(unittest.TestCase):
             6006, self.strategy.port_pid_state.port_for_sock(self.s1))
         self.assertEqual(
             6007, self.strategy.port_pid_state.port_for_sock(self.s2))
-        self.assertEqual([mock.call()], self.mock_setsid.mock_calls)
-        self.assertEqual([mock.call('/')], self.mock_chdir.mock_calls)
-        self.assertEqual([mock.call(0o22)], self.mock_umask.mock_calls)
-
-    def test_bind_ports_ignores_setsid_errors(self):
-        self.mock_setsid.side_effect = OSError()
-        self.strategy.do_bind_ports()
-
-        self.assertEqual(set((6006, 6007)), self.strategy.bind_ports)
-        self.assertEqual([
-            mock.call({'workers': 100,  # ignored
-                       'user': 'bob',
-                       'swift_dir': '/jim/cricket',
-                       'ring_check_interval': '76',
-                       'bind_ip': '2.3.4.5',
-                       'bind_port': 6006}),
-            mock.call({'workers': 100,  # ignored
-                       'user': 'bob',
-                       'swift_dir': '/jim/cricket',
-                       'ring_check_interval': '76',
-                       'bind_ip': '2.3.4.5',
-                       'bind_port': 6007}),
-        ], self.mock_get_socket.mock_calls)
-        self.assertEqual(
-            6006, self.strategy.port_pid_state.port_for_sock(self.s1))
-        self.assertEqual(
-            6007, self.strategy.port_pid_state.port_for_sock(self.s2))
-        self.assertEqual([mock.call()], self.mock_setsid.mock_calls)
-        self.assertEqual([mock.call('/')], self.mock_chdir.mock_calls)
-        self.assertEqual([mock.call(0o22)], self.mock_umask.mock_calls)
+        # strategy binding no longer does clean_up_deemon_hygene() actions, the
+        # user of the strategy does.
+        self.assertEqual([], self.mock_setsid.mock_calls)
+        self.assertEqual([], self.mock_chdir.mock_calls)
+        self.assertEqual([], self.mock_umask.mock_calls)
 
     def test_no_fork_sock(self):
         self.assertIsNone(self.strategy.no_fork_sock())
@@ -1519,7 +1499,7 @@ class TestServersPerPortStrategy(unittest.TestCase):
         self.strategy.post_fork_hook()
 
         self.assertEqual([
-            mock.call('bob', call_setsid=False),
+            mock.call('bob'),
         ], self.mock_drop_privileges.mock_calls)
 
     def test_shutdown_sockets(self):
@@ -1555,6 +1535,9 @@ class TestWorkersStrategy(unittest.TestCase):
         patcher = mock.patch('swift.common.wsgi.drop_privileges')
         self.mock_drop_privileges = patcher.start()
         self.addCleanup(patcher.stop)
+        patcher = mock.patch('swift.common.wsgi.clean_up_daemon_hygiene')
+        self.mock_clean_up_daemon_hygene = patcher.start()
+        self.addCleanup(patcher.stop)
 
     def test_loop_timeout(self):
         # This strategy should sit in the green.os.wait() for a bit (to avoid
@@ -1569,9 +1552,10 @@ class TestWorkersStrategy(unittest.TestCase):
         self.assertEqual([
             mock.call(self.conf),
         ], self.mock_get_socket.mock_calls)
-        self.assertEqual([
-            mock.call('bob'),
-        ], self.mock_drop_privileges.mock_calls)
+        # strategy binding no longer drops privileges nor does
+        # clean_up_deemon_hygene() actions.
+        self.assertEqual([], self.mock_drop_privileges.mock_calls)
+        self.assertEqual([], self.mock_clean_up_daemon_hygene.mock_calls)
 
         self.mock_get_socket.side_effect = wsgi.ConfigFilePortError()
 
@@ -1643,9 +1627,16 @@ class TestWorkersStrategy(unittest.TestCase):
         self.assertEqual([
             mock.call.shutdown_safe(self.mock_get_socket.return_value),
         ], mock_greenio.mock_calls)
-        self.assertEqual([
-            mock.call.close(),
-        ], self.mock_get_socket.return_value.mock_calls)
+        if six.PY2:
+            self.assertEqual([
+                mock.call.__nonzero__(),
+                mock.call.close(),
+            ], self.mock_get_socket.return_value.mock_calls)
+        else:
+            self.assertEqual([
+                mock.call.__bool__(),
+                mock.call.close(),
+            ], self.mock_get_socket.return_value.mock_calls)
 
     def test_log_sock_exit(self):
         self.strategy.log_sock_exit('blahblah', 'blahblah')
diff --git a/tox.ini b/tox.ini
index 18567359d..ed056c1e1 100644
--- a/tox.ini
+++ b/tox.ini
@@ -9,12 +9,13 @@ install_command = pip install -U {opts} {packages}
 setenv = VIRTUAL_ENV={envdir}
          NOSE_WITH_COVERAGE=1
          NOSE_COVER_BRANCHES=1
+         NOSE_COVER_HTML_DIR={toxinidir}/cover
 deps =
   -c{env:UPPER_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/master}
   -r{toxinidir}/requirements.txt
   -r{toxinidir}/test-requirements.txt
-commands = find . ( -type f -o -type l ) -name "*.py[co]" -delete
-           find . -type d -name "__pycache__" -delete
+commands = find {envdir} ( -type f -o -type l ) -name "*.py[co]" -delete
+           find {envdir} -type d -name "__pycache__" -delete
            nosetests {posargs:test/unit}
 whitelist_externals = find
                       rm
author	Zuul <zuul@review.opendev.org>	2019-11-14 20:34:48 +0000
committer	Gerrit Code Review <review@openstack.org>	2019-11-14 20:34:48 +0000
commit	9fa0b211a9b48c6fa00654386a615094f750a151 (patch)
tree	fddfbe3411070019eee35883799e003efe2b999f
parent	cf33b3dac78edd5830217dea87c126f14f4c6fce (diff)
parent	1107f24179c0c6fdcb58771f3a6e6c025352b5d3 (diff)
download	swift-9fa0b211a9b48c6fa00654386a615094f750a151.tar.gz