summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2021-09-16 16:51:54 +0000
committerGerrit Code Review <review@openstack.org>2021-09-16 16:51:54 +0000
commit3699365a8169b05f2e5b1dfb412a038fa756e328 (patch)
treed5c359e89b4ac9daff16bf1e5746bb595ce99f91
parent14985866f49e40e795c8d20823b8c202fcc09358 (diff)
parente8c06e206c532504474692171e15484496e5a68d (diff)
downloadironic-3699365a8169b05f2e5b1dfb412a038fa756e328.tar.gz
Merge "Fix driver task pattern to reduce periodic db load" into stable/wallaby
-rw-r--r--ironic/drivers/modules/drac/bios.py8
-rw-r--r--ironic/drivers/modules/drac/management.py15
-rw-r--r--ironic/drivers/modules/drac/raid.py12
-rw-r--r--ironic/drivers/modules/irmc/raid.py9
-rw-r--r--ironic/drivers/modules/redfish/management.py27
-rw-r--r--ironic/drivers/modules/redfish/raid.py27
-rw-r--r--releasenotes/notes/optimize-driver-task-pattern-322e02b6a2233919.yaml8
7 files changed, 76 insertions, 30 deletions
diff --git a/ironic/drivers/modules/drac/bios.py b/ironic/drivers/modules/drac/bios.py
index b6f59db61..2ec6660a9 100644
--- a/ironic/drivers/modules/drac/bios.py
+++ b/ironic/drivers/modules/drac/bios.py
@@ -201,6 +201,14 @@ class DracWSManBIOS(base.BIOSInterface):
for (node_uuid, driver, conductor_group,
driver_internal_info) in node_list:
try:
+ # NOTE(TheJulia) Evaluate if work is actually required before
+ # creating a task for every node in the deployment which does
+ # not have a lock and is not in maintenance mode.
+ if (not driver_internal_info.get("bios_config_job_ids")
+ and not driver_internal_info.get(
+ "factory_reset_time_before_reboot")):
+ continue
+
lock_purpose = 'checking async bios configuration jobs'
# Performing read-only/non-destructive work with shared lock
with task_manager.acquire(context, node_uuid,
diff --git a/ironic/drivers/modules/drac/management.py b/ironic/drivers/modules/drac/management.py
index 006357a08..a1f39818b 100644
--- a/ironic/drivers/modules/drac/management.py
+++ b/ironic/drivers/modules/drac/management.py
@@ -478,6 +478,17 @@ class DracRedfishManagement(redfish_management.RedfishManagement):
for (node_uuid, driver, conductor_group,
driver_internal_info) in node_list:
try:
+
+ task_monitor_url = driver_internal_info.get(
+ 'import_task_monitor_url')
+ # NOTE(TheJulia): Evaluate if a task montitor URL exists
+ # based upon our inital DB query before pulling a task for
+ # every node in the deployment which reduces the overall
+ # number of DB queries triggering in the background where
+ # no work is required.
+ if not task_monitor_url:
+ continue
+
lock_purpose = 'checking async import configuration task'
with task_manager.acquire(context, node_uuid,
purpose=lock_purpose,
@@ -485,10 +496,6 @@ class DracRedfishManagement(redfish_management.RedfishManagement):
if not isinstance(task.driver.management,
DracRedfishManagement):
continue
- task_monitor_url = driver_internal_info.get(
- 'import_task_monitor_url')
- if not task_monitor_url:
- continue
self._check_import_configuration_task(
task, task_monitor_url)
except exception.NodeNotFound:
diff --git a/ironic/drivers/modules/drac/raid.py b/ironic/drivers/modules/drac/raid.py
index b42b5b21f..4bb41c1f6 100644
--- a/ironic/drivers/modules/drac/raid.py
+++ b/ironic/drivers/modules/drac/raid.py
@@ -1482,6 +1482,14 @@ class DracWSManRAID(base.RAIDInterface):
for (node_uuid, driver, conductor_group,
driver_internal_info) in node_list:
try:
+
+ job_ids = driver_internal_info.get('raid_config_job_ids')
+ # NOTE(TheJulia): Evaluate if there is work to be done
+ # based upon the original DB query's results so we don't
+ # proceed creating tasks for every node in the deployment.
+ if not job_ids:
+ continue
+
lock_purpose = 'checking async raid configuration jobs'
with task_manager.acquire(context, node_uuid,
purpose=lock_purpose,
@@ -1489,10 +1497,6 @@ class DracWSManRAID(base.RAIDInterface):
if not isinstance(task.driver.raid, DracWSManRAID):
continue
- job_ids = driver_internal_info.get('raid_config_job_ids')
- if not job_ids:
- continue
-
self._check_node_raid_jobs(task)
except exception.NodeNotFound:
diff --git a/ironic/drivers/modules/irmc/raid.py b/ironic/drivers/modules/irmc/raid.py
index 901695632..34d1c3f38 100644
--- a/ironic/drivers/modules/irmc/raid.py
+++ b/ironic/drivers/modules/irmc/raid.py
@@ -434,6 +434,13 @@ class IRMCRAID(base.RAIDInterface):
node_list = manager.iter_nodes(fields=fields, filters=filters)
for (node_uuid, driver, conductor_group, raid_config) in node_list:
try:
+ # NOTE(TheJulia): Evaluate based upon presence of raid
+ # configuration before triggering a task, as opposed to after
+ # so we don't create excess node task objects with related
+ # DB queries.
+ if not raid_config or raid_config.get('fgi_status'):
+ continue
+
lock_purpose = 'checking async RAID configuration tasks'
with task_manager.acquire(context, node_uuid,
purpose=lock_purpose,
@@ -444,8 +451,6 @@ class IRMCRAID(base.RAIDInterface):
continue
if task.node.target_raid_config is None:
continue
- if not raid_config or raid_config.get('fgi_status'):
- continue
task.upgrade_lock()
if node.provision_state != states.CLEANWAIT:
continue
diff --git a/ironic/drivers/modules/redfish/management.py b/ironic/drivers/modules/redfish/management.py
index 7af5273d1..3c52b4051 100644
--- a/ironic/drivers/modules/redfish/management.py
+++ b/ironic/drivers/modules/redfish/management.py
@@ -872,6 +872,15 @@ class RedfishManagement(base.ManagementInterface):
for (node_uuid, driver, conductor_group,
driver_internal_info) in node_list:
try:
+ firmware_updates = driver_internal_info.get(
+ 'firmware_updates')
+ # NOTE(TheJulia): If we don't have a entry upfront, we can
+ # safely skip past the node as we know work here is not
+ # required, otherwise minimizing the number of potential
+ # nodes to visit.
+ if not firmware_updates:
+ continue
+
lock_purpose = 'checking async firmware update failed.'
with task_manager.acquire(context, node_uuid,
purpose=lock_purpose,
@@ -880,11 +889,6 @@ class RedfishManagement(base.ManagementInterface):
RedfishManagement):
continue
- firmware_updates = driver_internal_info.get(
- 'firmware_updates')
- if not firmware_updates:
- continue
-
node = task.node
# A firmware update failed. Discard any remaining firmware
@@ -921,6 +925,14 @@ class RedfishManagement(base.ManagementInterface):
for (node_uuid, driver, conductor_group,
driver_internal_info) in node_list:
try:
+ firmware_updates = driver_internal_info.get(
+ 'firmware_updates')
+ # NOTE(TheJulia): Check and skip upfront before creating a
+ # task so we don't generate additional tasks and db queries
+ # for every node in CLEANWAIT which is not locked.
+ if not firmware_updates:
+ continue
+
lock_purpose = 'checking async firmware update tasks.'
with task_manager.acquire(context, node_uuid,
purpose=lock_purpose,
@@ -929,11 +941,6 @@ class RedfishManagement(base.ManagementInterface):
RedfishManagement):
continue
- firmware_updates = driver_internal_info.get(
- 'firmware_updates')
- if not firmware_updates:
- continue
-
self._check_node_firmware_update(task)
except exception.NodeNotFound:
diff --git a/ironic/drivers/modules/redfish/raid.py b/ironic/drivers/modules/redfish/raid.py
index 4a7201a1f..1afc867c3 100644
--- a/ironic/drivers/modules/redfish/raid.py
+++ b/ironic/drivers/modules/redfish/raid.py
@@ -1033,6 +1033,15 @@ class RedfishRAID(base.RAIDInterface):
for (node_uuid, driver, conductor_group,
driver_internal_info) in node_list:
try:
+ raid_configs = driver_internal_info.get(
+ 'raid_configs')
+ # NOTE(TheJulia): Evaluate the presence of raid configuration
+ # activity before pulling the task, so we don't needlessly
+ # create database queries with tasks which would be skipped
+ # anyhow.
+ if not raid_configs:
+ continue
+
lock_purpose = 'checking async RAID config failed.'
with task_manager.acquire(context, node_uuid,
purpose=lock_purpose,
@@ -1040,11 +1049,6 @@ class RedfishRAID(base.RAIDInterface):
if not isinstance(task.driver.raid, RedfishRAID):
continue
- raid_configs = driver_internal_info.get(
- 'raid_configs')
- if not raid_configs:
- continue
-
node = task.node
# A RAID config failed. Discard any remaining RAID
@@ -1081,6 +1085,14 @@ class RedfishRAID(base.RAIDInterface):
for (node_uuid, driver, conductor_group,
driver_internal_info) in node_list:
try:
+ raid_configs = driver_internal_info.get(
+ 'raid_configs')
+ # NOTE(TheJulia): Skip to next record if we do not
+ # have raid configuraiton tasks, so we don't pull tasks
+ # for every unrelated node in CLEANWAIT.
+ if not raid_configs:
+ continue
+
lock_purpose = 'checking async RAID config tasks.'
with task_manager.acquire(context, node_uuid,
purpose=lock_purpose,
@@ -1088,11 +1100,6 @@ class RedfishRAID(base.RAIDInterface):
if not isinstance(task.driver.raid, RedfishRAID):
continue
- raid_configs = driver_internal_info.get(
- 'raid_configs')
- if not raid_configs:
- continue
-
self._check_node_raid_config(task)
except exception.NodeNotFound:
diff --git a/releasenotes/notes/optimize-driver-task-pattern-322e02b6a2233919.yaml b/releasenotes/notes/optimize-driver-task-pattern-322e02b6a2233919.yaml
new file mode 100644
index 000000000..8f5c2b192
--- /dev/null
+++ b/releasenotes/notes/optimize-driver-task-pattern-322e02b6a2233919.yaml
@@ -0,0 +1,8 @@
+---
+fixes:
+ - |
+ Fixes the pattern of execution for periodic tasks such that the majority
+ of drivers now evaluate *if* work needs to be performed in advance of
+ creating a node task. Depending on the individual driver query pattern,
+ this prevents excess database queries from being triggered with every
+ task execution.