summaryrefslogtreecommitdiff
path: root/ironic/common/rpc_service.py
diff options
context:
space:
mode:
authorSteve Baker <sbaker@redhat.com>2023-02-27 11:05:10 +1300
committerSteve Baker <sbaker@redhat.com>2023-02-27 11:10:31 +1300
commit6a9e319fbeb0851c51bb14b9c4c3c5fa4685b14d (patch)
tree7505771b8835a033e60c08c39827f03011f3d682 /ironic/common/rpc_service.py
parente54ee2ba4cb818e25c75fcdc69f7ff1dc4956c73 (diff)
downloadironic-6a9e319fbeb0851c51bb14b9c4c3c5fa4685b14d.tar.gz
On rpc service stop, wait for node reservation release
Instead of clearing existing reservations at the beginning of del_host, wait for the tasks holding them to go to completion. This check continues indefinitely until the conductor process exits due to one of: - All reservations for this conductor are released - CONF.graceful_shutdown_timeout has elapsed - The process manager (systemd, kubernetes) sends SIGKILL after the configured graceful period Because the default values of [DEFAULT]graceful_shutdown_timeout and [conductor]heartbeat_timeout are the same (60s) no other conductor will claim a node as an orphan until this conductor exits. Change-Id: Ib8db915746228cd87272740825aaaea1fdf953c7
Diffstat (limited to 'ironic/common/rpc_service.py')
-rw-r--r--ironic/common/rpc_service.py18
1 files changed, 17 insertions, 1 deletions
diff --git a/ironic/common/rpc_service.py b/ironic/common/rpc_service.py
index cb0f23c98..a74f6bab3 100644
--- a/ironic/common/rpc_service.py
+++ b/ironic/common/rpc_service.py
@@ -100,7 +100,8 @@ class RPCService(service.Service):
seconds=CONF.hash_ring_reset_interval)
try:
- self.manager.del_host(deregister=self.deregister)
+ self.manager.del_host(deregister=self.deregister,
+ clear_node_reservations=False)
except Exception as e:
LOG.exception('Service error occurred when cleaning up '
'the RPC manager. Error: %s', e)
@@ -127,6 +128,21 @@ class RPCService(service.Service):
LOG.info('Stopped RPC server for service %(service)s on host '
'%(host)s.',
{'service': self.topic, 'host': self.host})
+
+ # Wait for reservation locks held by this conductor.
+ # The conductor process will end when:
+ # - All reservations for this conductor are released
+ # - CONF.graceful_shutdown_timeout has elapsed
+ # - The process manager (systemd, kubernetes) sends SIGKILL after the
+ # configured graceful period
+ graceful_time = initial_time + datetime.timedelta(
+ seconds=CONF.graceful_shutdown_timeout)
+ while (self.manager.has_reserved()
+ and graceful_time > timeutils.utcnow()):
+ LOG.info('Waiting for reserved nodes to clear on host %(host)s',
+ {'host': self.host})
+ time.sleep(1)
+
rpc.set_global_manager(None)
def _handle_signal(self, signo, frame):