diff options
author | Dmitry Tantsur <dtantsur@protonmail.com> | 2020-09-28 14:37:47 +0200 |
---|---|---|
committer | Dmitry Tantsur <dtantsur@protonmail.com> | 2020-09-28 17:53:46 +0200 |
commit | 0c3f52ec9a12c5854d326bc95fab78e6e87b4dcb (patch) | |
tree | e546947f4ae946fd3791ad1a5ade74e5217bb92a /ironic/conductor/utils.py | |
parent | ac19e6050d6347c51841d09b2e84fb2833b84aa5 (diff) | |
download | ironic-0c3f52ec9a12c5854d326bc95fab78e6e87b4dcb.tar.gz |
Do not silently ignore exceptions when running next steps
Currently if do_next_deploy/clean_step fails, the failure is swallowed
by eventlet since they're run in a new thread. This 1) is incorrect,
2) leads to nodes stuck in DEPLOYING/CLEANING.
Also update logging in agent_base to be able to easier spot similar
problems.
Change-Id: I0282c9e06c54a173efc666cd8df25cf573afb394
Diffstat (limited to 'ironic/conductor/utils.py')
-rw-r--r-- | ironic/conductor/utils.py | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index 26c590f4d..8c5dd8e3f 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -16,6 +16,7 @@ import contextlib import crypt import datetime from distutils.version import StrictVersion +import functools import os import secrets import time @@ -563,6 +564,21 @@ def deploying_error_handler(task, logmsg, errmsg=None, traceback=False, task.process_event('fail') +def fail_on_error(error_callback, msg, *error_args, **error_kwargs): + """A decorator for failing operation on failure.""" + def wrapper(func): + @functools.wraps(func) + def wrapped(task, *args, **kwargs): + try: + return func(task, *args, **kwargs) + except Exception as exc: + errmsg = "%s. %s: %s" % (msg, exc.__class__.__name__, exc) + error_callback(task, errmsg, *error_args, **error_kwargs) + + return wrapped + return wrapper + + @task_manager.require_exclusive_lock def abort_on_conductor_take_over(task): """Set node's state when a task was aborted due to conductor take over. |