diff options
author | Dmitry Tantsur <dtantsur@protonmail.com> | 2022-01-26 14:59:34 +0100 |
---|---|---|
committer | Dmitry Tantsur <dtantsur@protonmail.com> | 2022-01-26 15:18:40 +0100 |
commit | 2c58ab3703675b43d9f197f609fc576e850ac0da (patch) | |
tree | 06a6d7441ae280add92ffeed84b131384ce7d8e7 | |
parent | e841fa05456ae8cfa35222fc6b787d9aadababe6 (diff) | |
download | ironic-2c58ab3703675b43d9f197f609fc576e850ac0da.tar.gz |
Wait for conductor start before notifying systemd
Currently, the launcher first notifies systemd, then starts checking the
services (RPC and WSGI). So any failures will be reported, but only
after systemd declares the service ready.
This change adds a polling loop to make sure RpcService.start() finishes
successfully.
Change-Id: Ib460622d69a9cb1cb82e796a6ab294bbbb40c359
-rw-r--r-- | ironic/cmd/conductor.py | 6 | ||||
-rw-r--r-- | ironic/cmd/singleprocess.py | 5 | ||||
-rw-r--r-- | ironic/common/rpc_service.py | 22 | ||||
-rw-r--r-- | ironic/tests/unit/common/test_rpc_service.py | 31 | ||||
-rw-r--r-- | releasenotes/notes/service-wait-e85cbe7978f61764.yaml | 5 |
5 files changed, 69 insertions, 0 deletions
diff --git a/ironic/cmd/conductor.py b/ironic/cmd/conductor.py index 5fa4c8489..a932cb22f 100644 --- a/ironic/cmd/conductor.py +++ b/ironic/cmd/conductor.py @@ -67,6 +67,12 @@ def main(): issue_startup_warnings(CONF) launcher = service.launch(CONF, mgr, restart_method='mutate') + + # NOTE(dtantsur): handling start-up failures before launcher.wait() helps + # notify systemd about them. Otherwise the launcher will report successful + # service start-up before checking the threads. + mgr.wait_for_start() + sys.exit(launcher.wait()) diff --git a/ironic/cmd/singleprocess.py b/ironic/cmd/singleprocess.py index 20a348ae5..675bd1bc2 100644 --- a/ironic/cmd/singleprocess.py +++ b/ironic/cmd/singleprocess.py @@ -49,4 +49,9 @@ def main(): wsgi = wsgi_service.WSGIService('ironic_api', CONF.api.enable_ssl_api) launcher.launch_service(wsgi) + # NOTE(dtantsur): handling start-up failures before launcher.wait() helps + # notify systemd about them. Otherwise the launcher will report successful + # service start-up before checking the threads. + mgr.wait_for_start() + sys.exit(launcher.wait()) diff --git a/ironic/common/rpc_service.py b/ironic/common/rpc_service.py index 78379c981..b0eec7758 100644 --- a/ironic/common/rpc_service.py +++ b/ironic/common/rpc_service.py @@ -15,6 +15,8 @@ # under the License. import signal +import sys +import time from ironic_lib.json_rpc import server as json_rpc from oslo_config import cfg @@ -42,9 +44,29 @@ class RPCService(service.Service): self.topic = self.manager.topic self.rpcserver = None self.deregister = True + self._failure = None + self._started = False + + def wait_for_start(self): + while not self._started and not self._failure: + time.sleep(0.1) + if self._failure: + LOG.critical(self._failure) + sys.exit(self._failure) def start(self): + self._failure = None + self._started = False super(RPCService, self).start() + try: + self._real_start() + except Exception as exc: + self._failure = f"{exc.__class__.__name__}: {exc}" + raise + else: + self._started = True + + def _real_start(self): admin_context = context.get_admin_context() serializer = objects_base.IronicObjectSerializer(is_server=True) diff --git a/ironic/tests/unit/common/test_rpc_service.py b/ironic/tests/unit/common/test_rpc_service.py index 4e190f5e6..8483bfb22 100644 --- a/ironic/tests/unit/common/test_rpc_service.py +++ b/ironic/tests/unit/common/test_rpc_service.py @@ -46,6 +46,8 @@ class TestRPCService(base.TestCase): mock_rpc, mock_ios, mock_target, mock_prepare_method): mock_rpc.return_value.start = mock.MagicMock() self.rpc_svc.handle_signal = mock.MagicMock() + self.assertFalse(self.rpc_svc._started) + self.assertFalse(self.rpc_svc._failure) self.rpc_svc.start() mock_ctx.assert_called_once_with() mock_target.assert_called_once_with(topic=self.rpc_svc.topic, @@ -55,6 +57,9 @@ class TestRPCService(base.TestCase): mock_init_method.assert_called_once_with(self.rpc_svc.manager, mock_ctx.return_value) self.assertIs(rpc.GLOBAL_MANAGER, self.rpc_svc.manager) + self.assertTrue(self.rpc_svc._started) + self.assertFalse(self.rpc_svc._failure) + self.rpc_svc.wait_for_start() # should be no-op @mock.patch.object(manager.ConductorManager, 'prepare_host', autospec=True) @mock.patch.object(oslo_messaging, 'Target', autospec=True) @@ -77,3 +82,29 @@ class TestRPCService(base.TestCase): mock_init_method.assert_called_once_with(self.rpc_svc.manager, mock_ctx.return_value) self.assertIs(rpc.GLOBAL_MANAGER, self.rpc_svc.manager) + + @mock.patch.object(manager.ConductorManager, 'prepare_host', autospec=True) + @mock.patch.object(oslo_messaging, 'Target', autospec=True) + @mock.patch.object(objects_base, 'IronicObjectSerializer', autospec=True) + @mock.patch.object(rpc, 'get_server', autospec=True) + @mock.patch.object(manager.ConductorManager, 'init_host', autospec=True) + @mock.patch.object(context, 'get_admin_context', autospec=True) + def test_start_failure(self, mock_ctx, mock_init_method, mock_rpc, + mock_ios, mock_target, mock_prepare_method): + mock_rpc.return_value.start = mock.MagicMock() + self.rpc_svc.handle_signal = mock.MagicMock() + mock_init_method.side_effect = RuntimeError("boom") + self.assertFalse(self.rpc_svc._started) + self.assertFalse(self.rpc_svc._failure) + self.assertRaises(RuntimeError, self.rpc_svc.start) + mock_ctx.assert_called_once_with() + mock_target.assert_called_once_with(topic=self.rpc_svc.topic, + server="fake_host") + mock_ios.assert_called_once_with(is_server=True) + mock_prepare_method.assert_called_once_with(self.rpc_svc.manager) + mock_init_method.assert_called_once_with(self.rpc_svc.manager, + mock_ctx.return_value) + self.assertIsNone(rpc.GLOBAL_MANAGER) + self.assertFalse(self.rpc_svc._started) + self.assertIn("boom", self.rpc_svc._failure) + self.assertRaises(SystemExit, self.rpc_svc.wait_for_start) diff --git a/releasenotes/notes/service-wait-e85cbe7978f61764.yaml b/releasenotes/notes/service-wait-e85cbe7978f61764.yaml new file mode 100644 index 000000000..c2fff601d --- /dev/null +++ b/releasenotes/notes/service-wait-e85cbe7978f61764.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + The ``ironic`` and ``ironic-conductor`` services now wait for the conductor + manager to start before notifying systemd about the successful start-up. |