diff options
author | Zuul <zuul@review.opendev.org> | 2021-09-16 16:51:54 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2021-09-16 16:51:54 +0000 |
commit | 3699365a8169b05f2e5b1dfb412a038fa756e328 (patch) | |
tree | d5c359e89b4ac9daff16bf1e5746bb595ce99f91 | |
parent | 14985866f49e40e795c8d20823b8c202fcc09358 (diff) | |
parent | e8c06e206c532504474692171e15484496e5a68d (diff) | |
download | ironic-3699365a8169b05f2e5b1dfb412a038fa756e328.tar.gz |
Merge "Fix driver task pattern to reduce periodic db load" into stable/wallaby
-rw-r--r-- | ironic/drivers/modules/drac/bios.py | 8 | ||||
-rw-r--r-- | ironic/drivers/modules/drac/management.py | 15 | ||||
-rw-r--r-- | ironic/drivers/modules/drac/raid.py | 12 | ||||
-rw-r--r-- | ironic/drivers/modules/irmc/raid.py | 9 | ||||
-rw-r--r-- | ironic/drivers/modules/redfish/management.py | 27 | ||||
-rw-r--r-- | ironic/drivers/modules/redfish/raid.py | 27 | ||||
-rw-r--r-- | releasenotes/notes/optimize-driver-task-pattern-322e02b6a2233919.yaml | 8 |
7 files changed, 76 insertions, 30 deletions
diff --git a/ironic/drivers/modules/drac/bios.py b/ironic/drivers/modules/drac/bios.py index b6f59db61..2ec6660a9 100644 --- a/ironic/drivers/modules/drac/bios.py +++ b/ironic/drivers/modules/drac/bios.py @@ -201,6 +201,14 @@ class DracWSManBIOS(base.BIOSInterface): for (node_uuid, driver, conductor_group, driver_internal_info) in node_list: try: + # NOTE(TheJulia) Evaluate if work is actually required before + # creating a task for every node in the deployment which does + # not have a lock and is not in maintenance mode. + if (not driver_internal_info.get("bios_config_job_ids") + and not driver_internal_info.get( + "factory_reset_time_before_reboot")): + continue + lock_purpose = 'checking async bios configuration jobs' # Performing read-only/non-destructive work with shared lock with task_manager.acquire(context, node_uuid, diff --git a/ironic/drivers/modules/drac/management.py b/ironic/drivers/modules/drac/management.py index 006357a08..a1f39818b 100644 --- a/ironic/drivers/modules/drac/management.py +++ b/ironic/drivers/modules/drac/management.py @@ -478,6 +478,17 @@ class DracRedfishManagement(redfish_management.RedfishManagement): for (node_uuid, driver, conductor_group, driver_internal_info) in node_list: try: + + task_monitor_url = driver_internal_info.get( + 'import_task_monitor_url') + # NOTE(TheJulia): Evaluate if a task montitor URL exists + # based upon our inital DB query before pulling a task for + # every node in the deployment which reduces the overall + # number of DB queries triggering in the background where + # no work is required. + if not task_monitor_url: + continue + lock_purpose = 'checking async import configuration task' with task_manager.acquire(context, node_uuid, purpose=lock_purpose, @@ -485,10 +496,6 @@ class DracRedfishManagement(redfish_management.RedfishManagement): if not isinstance(task.driver.management, DracRedfishManagement): continue - task_monitor_url = driver_internal_info.get( - 'import_task_monitor_url') - if not task_monitor_url: - continue self._check_import_configuration_task( task, task_monitor_url) except exception.NodeNotFound: diff --git a/ironic/drivers/modules/drac/raid.py b/ironic/drivers/modules/drac/raid.py index b42b5b21f..4bb41c1f6 100644 --- a/ironic/drivers/modules/drac/raid.py +++ b/ironic/drivers/modules/drac/raid.py @@ -1482,6 +1482,14 @@ class DracWSManRAID(base.RAIDInterface): for (node_uuid, driver, conductor_group, driver_internal_info) in node_list: try: + + job_ids = driver_internal_info.get('raid_config_job_ids') + # NOTE(TheJulia): Evaluate if there is work to be done + # based upon the original DB query's results so we don't + # proceed creating tasks for every node in the deployment. + if not job_ids: + continue + lock_purpose = 'checking async raid configuration jobs' with task_manager.acquire(context, node_uuid, purpose=lock_purpose, @@ -1489,10 +1497,6 @@ class DracWSManRAID(base.RAIDInterface): if not isinstance(task.driver.raid, DracWSManRAID): continue - job_ids = driver_internal_info.get('raid_config_job_ids') - if not job_ids: - continue - self._check_node_raid_jobs(task) except exception.NodeNotFound: diff --git a/ironic/drivers/modules/irmc/raid.py b/ironic/drivers/modules/irmc/raid.py index 901695632..34d1c3f38 100644 --- a/ironic/drivers/modules/irmc/raid.py +++ b/ironic/drivers/modules/irmc/raid.py @@ -434,6 +434,13 @@ class IRMCRAID(base.RAIDInterface): node_list = manager.iter_nodes(fields=fields, filters=filters) for (node_uuid, driver, conductor_group, raid_config) in node_list: try: + # NOTE(TheJulia): Evaluate based upon presence of raid + # configuration before triggering a task, as opposed to after + # so we don't create excess node task objects with related + # DB queries. + if not raid_config or raid_config.get('fgi_status'): + continue + lock_purpose = 'checking async RAID configuration tasks' with task_manager.acquire(context, node_uuid, purpose=lock_purpose, @@ -444,8 +451,6 @@ class IRMCRAID(base.RAIDInterface): continue if task.node.target_raid_config is None: continue - if not raid_config or raid_config.get('fgi_status'): - continue task.upgrade_lock() if node.provision_state != states.CLEANWAIT: continue diff --git a/ironic/drivers/modules/redfish/management.py b/ironic/drivers/modules/redfish/management.py index 7af5273d1..3c52b4051 100644 --- a/ironic/drivers/modules/redfish/management.py +++ b/ironic/drivers/modules/redfish/management.py @@ -872,6 +872,15 @@ class RedfishManagement(base.ManagementInterface): for (node_uuid, driver, conductor_group, driver_internal_info) in node_list: try: + firmware_updates = driver_internal_info.get( + 'firmware_updates') + # NOTE(TheJulia): If we don't have a entry upfront, we can + # safely skip past the node as we know work here is not + # required, otherwise minimizing the number of potential + # nodes to visit. + if not firmware_updates: + continue + lock_purpose = 'checking async firmware update failed.' with task_manager.acquire(context, node_uuid, purpose=lock_purpose, @@ -880,11 +889,6 @@ class RedfishManagement(base.ManagementInterface): RedfishManagement): continue - firmware_updates = driver_internal_info.get( - 'firmware_updates') - if not firmware_updates: - continue - node = task.node # A firmware update failed. Discard any remaining firmware @@ -921,6 +925,14 @@ class RedfishManagement(base.ManagementInterface): for (node_uuid, driver, conductor_group, driver_internal_info) in node_list: try: + firmware_updates = driver_internal_info.get( + 'firmware_updates') + # NOTE(TheJulia): Check and skip upfront before creating a + # task so we don't generate additional tasks and db queries + # for every node in CLEANWAIT which is not locked. + if not firmware_updates: + continue + lock_purpose = 'checking async firmware update tasks.' with task_manager.acquire(context, node_uuid, purpose=lock_purpose, @@ -929,11 +941,6 @@ class RedfishManagement(base.ManagementInterface): RedfishManagement): continue - firmware_updates = driver_internal_info.get( - 'firmware_updates') - if not firmware_updates: - continue - self._check_node_firmware_update(task) except exception.NodeNotFound: diff --git a/ironic/drivers/modules/redfish/raid.py b/ironic/drivers/modules/redfish/raid.py index 4a7201a1f..1afc867c3 100644 --- a/ironic/drivers/modules/redfish/raid.py +++ b/ironic/drivers/modules/redfish/raid.py @@ -1033,6 +1033,15 @@ class RedfishRAID(base.RAIDInterface): for (node_uuid, driver, conductor_group, driver_internal_info) in node_list: try: + raid_configs = driver_internal_info.get( + 'raid_configs') + # NOTE(TheJulia): Evaluate the presence of raid configuration + # activity before pulling the task, so we don't needlessly + # create database queries with tasks which would be skipped + # anyhow. + if not raid_configs: + continue + lock_purpose = 'checking async RAID config failed.' with task_manager.acquire(context, node_uuid, purpose=lock_purpose, @@ -1040,11 +1049,6 @@ class RedfishRAID(base.RAIDInterface): if not isinstance(task.driver.raid, RedfishRAID): continue - raid_configs = driver_internal_info.get( - 'raid_configs') - if not raid_configs: - continue - node = task.node # A RAID config failed. Discard any remaining RAID @@ -1081,6 +1085,14 @@ class RedfishRAID(base.RAIDInterface): for (node_uuid, driver, conductor_group, driver_internal_info) in node_list: try: + raid_configs = driver_internal_info.get( + 'raid_configs') + # NOTE(TheJulia): Skip to next record if we do not + # have raid configuraiton tasks, so we don't pull tasks + # for every unrelated node in CLEANWAIT. + if not raid_configs: + continue + lock_purpose = 'checking async RAID config tasks.' with task_manager.acquire(context, node_uuid, purpose=lock_purpose, @@ -1088,11 +1100,6 @@ class RedfishRAID(base.RAIDInterface): if not isinstance(task.driver.raid, RedfishRAID): continue - raid_configs = driver_internal_info.get( - 'raid_configs') - if not raid_configs: - continue - self._check_node_raid_config(task) except exception.NodeNotFound: diff --git a/releasenotes/notes/optimize-driver-task-pattern-322e02b6a2233919.yaml b/releasenotes/notes/optimize-driver-task-pattern-322e02b6a2233919.yaml new file mode 100644 index 000000000..8f5c2b192 --- /dev/null +++ b/releasenotes/notes/optimize-driver-task-pattern-322e02b6a2233919.yaml @@ -0,0 +1,8 @@ +--- +fixes: + - | + Fixes the pattern of execution for periodic tasks such that the majority + of drivers now evaluate *if* work needs to be performed in advance of + creating a node task. Depending on the individual driver query pattern, + this prevents excess database queries from being triggered with every + task execution. |