summaryrefslogtreecommitdiff
path: root/ironic/conf/conductor.py
diff options
context:
space:
mode:
authorJulia Kreger <juliaashleykreger@gmail.com>2021-08-02 16:07:46 -0700
committerJulia Kreger <juliaashleykreger@gmail.com>2021-09-10 14:47:27 -0700
commitd17749249cbe8507c39eb213e5e97aa1fb543a55 (patch)
treeb1ae17fd632e65e9710566f3ab1dffe6335be46d /ironic/conf/conductor.py
parentfbaad948d870ffd18995f5494016798c8d3c9206 (diff)
downloadironic-d17749249cbe8507c39eb213e5e97aa1fb543a55.tar.gz
Record node history and manage events in db
* Adds periodic task to purge node_history entries based upon provided configuration. * Adds recording of node history entries for errors in the core conductor code. * Also changes the rescue abort behavior to remove the notice from being recorded as an error, as this is a likely bug in behavior for any process or service evaluating the node last_error field. * Makes use of a semi-free form event_type field to help provide some additional context into what is going on and why. For example if deployments are repeatedly failing, then perhaps it is a configuration issue, as opposed to a general failure. If a conductor has no resources, then the failure, in theory would point back to the conductor itself. Story: 2002980 Task: 42960 Change-Id: Ibfa8ac4878cacd98a43dd4424f6d53021ad91166
Diffstat (limited to 'ironic/conf/conductor.py')
-rw-r--r--ironic/conf/conductor.py51
1 files changed, 51 insertions, 0 deletions
diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py
index cb52c45d4..61d6b3247 100644
--- a/ironic/conf/conductor.py
+++ b/ironic/conf/conductor.py
@@ -283,6 +283,57 @@ opts = [
'will not run during cleaning. If unset for an '
'inband clean step, will use the priority set in the '
'ramdisk.')),
+ cfg.BoolOpt('node_history',
+ default=True,
+ mutable=True,
+ help=_('Boolean value, default True, if node event history '
+ 'is to be recorded. Errors and other noteworthy '
+ 'events in relation to a node are journaled to a '
+ 'database table which incurs some additional load. '
+ 'A periodic task does periodically remove entries '
+ 'from the database. Please note, if this is disabled, '
+ 'the conductor will continue to purge entries as '
+ 'long as [conductor]node_history_cleanup_batch_count '
+ 'is not 0.')),
+ cfg.IntOpt('node_history_max_entries',
+ default=300,
+ min=0,
+ mutable=True,
+ help=_('Maximum number of history entries which will be stored '
+ 'in the database per node. Default is 300. This setting '
+ 'excludes the minimum number of days retained using the '
+ '[conductor]node_history_minimum_days setting.')),
+ cfg.IntOpt('node_history_cleanup_interval',
+ min=0,
+ default=86400,
+ mutable=False,
+ help=_('Interval in seconds at which node history entries '
+ 'can be cleaned up in the database. Setting to 0 '
+ 'disables the periodic task. Defaults to once a day, '
+ 'or 86400 seconds.')),
+ cfg.IntOpt('node_history_cleanup_batch_count',
+ min=0,
+ default=1000,
+ mutable=False,
+ help=_('The target number of node history records to purge '
+ 'from the database when performing clean-up. '
+ 'Deletes are performed by node, and a node with excess '
+ 'records for a node will still be deleted. '
+ 'Defaults to 1000. Operators who find node history '
+ 'building up may wish to '
+ 'lower this threshold and decrease the time between '
+ 'cleanup operations using the '
+ '``node_history_cleanup_interval`` setting.')),
+ cfg.IntOpt('node_history_minimum_days',
+ min=0,
+ default=0,
+ mutable=True,
+ help=_('The minimum number of days to explicitly keep on '
+ 'hand in the database history entries for nodes. '
+ 'This is exclusive from the [conductor]'
+ 'node_history_max_entries setting as users of '
+ 'this setting are anticipated to need to retain '
+ 'history by policy.')),
]