summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitry Tantsur <dtantsur@protonmail.com>2022-01-26 14:59:34 +0100
committerDmitry Tantsur <dtantsur@protonmail.com>2022-01-26 15:18:40 +0100
commit2c58ab3703675b43d9f197f609fc576e850ac0da (patch)
tree06a6d7441ae280add92ffeed84b131384ce7d8e7
parente841fa05456ae8cfa35222fc6b787d9aadababe6 (diff)
downloadironic-2c58ab3703675b43d9f197f609fc576e850ac0da.tar.gz
Wait for conductor start before notifying systemd
Currently, the launcher first notifies systemd, then starts checking the services (RPC and WSGI). So any failures will be reported, but only after systemd declares the service ready. This change adds a polling loop to make sure RpcService.start() finishes successfully. Change-Id: Ib460622d69a9cb1cb82e796a6ab294bbbb40c359
-rw-r--r--ironic/cmd/conductor.py6
-rw-r--r--ironic/cmd/singleprocess.py5
-rw-r--r--ironic/common/rpc_service.py22
-rw-r--r--ironic/tests/unit/common/test_rpc_service.py31
-rw-r--r--releasenotes/notes/service-wait-e85cbe7978f61764.yaml5
5 files changed, 69 insertions, 0 deletions
diff --git a/ironic/cmd/conductor.py b/ironic/cmd/conductor.py
index 5fa4c8489..a932cb22f 100644
--- a/ironic/cmd/conductor.py
+++ b/ironic/cmd/conductor.py
@@ -67,6 +67,12 @@ def main():
issue_startup_warnings(CONF)
launcher = service.launch(CONF, mgr, restart_method='mutate')
+
+ # NOTE(dtantsur): handling start-up failures before launcher.wait() helps
+ # notify systemd about them. Otherwise the launcher will report successful
+ # service start-up before checking the threads.
+ mgr.wait_for_start()
+
sys.exit(launcher.wait())
diff --git a/ironic/cmd/singleprocess.py b/ironic/cmd/singleprocess.py
index 20a348ae5..675bd1bc2 100644
--- a/ironic/cmd/singleprocess.py
+++ b/ironic/cmd/singleprocess.py
@@ -49,4 +49,9 @@ def main():
wsgi = wsgi_service.WSGIService('ironic_api', CONF.api.enable_ssl_api)
launcher.launch_service(wsgi)
+ # NOTE(dtantsur): handling start-up failures before launcher.wait() helps
+ # notify systemd about them. Otherwise the launcher will report successful
+ # service start-up before checking the threads.
+ mgr.wait_for_start()
+
sys.exit(launcher.wait())
diff --git a/ironic/common/rpc_service.py b/ironic/common/rpc_service.py
index 78379c981..b0eec7758 100644
--- a/ironic/common/rpc_service.py
+++ b/ironic/common/rpc_service.py
@@ -15,6 +15,8 @@
# under the License.
import signal
+import sys
+import time
from ironic_lib.json_rpc import server as json_rpc
from oslo_config import cfg
@@ -42,9 +44,29 @@ class RPCService(service.Service):
self.topic = self.manager.topic
self.rpcserver = None
self.deregister = True
+ self._failure = None
+ self._started = False
+
+ def wait_for_start(self):
+ while not self._started and not self._failure:
+ time.sleep(0.1)
+ if self._failure:
+ LOG.critical(self._failure)
+ sys.exit(self._failure)
def start(self):
+ self._failure = None
+ self._started = False
super(RPCService, self).start()
+ try:
+ self._real_start()
+ except Exception as exc:
+ self._failure = f"{exc.__class__.__name__}: {exc}"
+ raise
+ else:
+ self._started = True
+
+ def _real_start(self):
admin_context = context.get_admin_context()
serializer = objects_base.IronicObjectSerializer(is_server=True)
diff --git a/ironic/tests/unit/common/test_rpc_service.py b/ironic/tests/unit/common/test_rpc_service.py
index 4e190f5e6..8483bfb22 100644
--- a/ironic/tests/unit/common/test_rpc_service.py
+++ b/ironic/tests/unit/common/test_rpc_service.py
@@ -46,6 +46,8 @@ class TestRPCService(base.TestCase):
mock_rpc, mock_ios, mock_target, mock_prepare_method):
mock_rpc.return_value.start = mock.MagicMock()
self.rpc_svc.handle_signal = mock.MagicMock()
+ self.assertFalse(self.rpc_svc._started)
+ self.assertFalse(self.rpc_svc._failure)
self.rpc_svc.start()
mock_ctx.assert_called_once_with()
mock_target.assert_called_once_with(topic=self.rpc_svc.topic,
@@ -55,6 +57,9 @@ class TestRPCService(base.TestCase):
mock_init_method.assert_called_once_with(self.rpc_svc.manager,
mock_ctx.return_value)
self.assertIs(rpc.GLOBAL_MANAGER, self.rpc_svc.manager)
+ self.assertTrue(self.rpc_svc._started)
+ self.assertFalse(self.rpc_svc._failure)
+ self.rpc_svc.wait_for_start() # should be no-op
@mock.patch.object(manager.ConductorManager, 'prepare_host', autospec=True)
@mock.patch.object(oslo_messaging, 'Target', autospec=True)
@@ -77,3 +82,29 @@ class TestRPCService(base.TestCase):
mock_init_method.assert_called_once_with(self.rpc_svc.manager,
mock_ctx.return_value)
self.assertIs(rpc.GLOBAL_MANAGER, self.rpc_svc.manager)
+
+ @mock.patch.object(manager.ConductorManager, 'prepare_host', autospec=True)
+ @mock.patch.object(oslo_messaging, 'Target', autospec=True)
+ @mock.patch.object(objects_base, 'IronicObjectSerializer', autospec=True)
+ @mock.patch.object(rpc, 'get_server', autospec=True)
+ @mock.patch.object(manager.ConductorManager, 'init_host', autospec=True)
+ @mock.patch.object(context, 'get_admin_context', autospec=True)
+ def test_start_failure(self, mock_ctx, mock_init_method, mock_rpc,
+ mock_ios, mock_target, mock_prepare_method):
+ mock_rpc.return_value.start = mock.MagicMock()
+ self.rpc_svc.handle_signal = mock.MagicMock()
+ mock_init_method.side_effect = RuntimeError("boom")
+ self.assertFalse(self.rpc_svc._started)
+ self.assertFalse(self.rpc_svc._failure)
+ self.assertRaises(RuntimeError, self.rpc_svc.start)
+ mock_ctx.assert_called_once_with()
+ mock_target.assert_called_once_with(topic=self.rpc_svc.topic,
+ server="fake_host")
+ mock_ios.assert_called_once_with(is_server=True)
+ mock_prepare_method.assert_called_once_with(self.rpc_svc.manager)
+ mock_init_method.assert_called_once_with(self.rpc_svc.manager,
+ mock_ctx.return_value)
+ self.assertIsNone(rpc.GLOBAL_MANAGER)
+ self.assertFalse(self.rpc_svc._started)
+ self.assertIn("boom", self.rpc_svc._failure)
+ self.assertRaises(SystemExit, self.rpc_svc.wait_for_start)
diff --git a/releasenotes/notes/service-wait-e85cbe7978f61764.yaml b/releasenotes/notes/service-wait-e85cbe7978f61764.yaml
new file mode 100644
index 000000000..c2fff601d
--- /dev/null
+++ b/releasenotes/notes/service-wait-e85cbe7978f61764.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+ - |
+ The ``ironic`` and ``ironic-conductor`` services now wait for the conductor
+ manager to start before notifying systemd about the successful start-up.