summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorBrian Coca <bcoca@users.noreply.github.com>2023-04-26 11:14:39 -0400
committerGitHub <noreply@github.com>2023-04-26 11:14:39 -0400
commitb2c00957224ab7324add12109d6f2be1bc41b55f (patch)
tree84957cd543611bb3f823152da63de7596541d8d8 /lib
parent1e8b889b932f834aa62e20c4d412418a3f97a70c (diff)
downloadansible-b2c00957224ab7324add12109d6f2be1bc41b55f.tar.gz
Fix gather parallel (#80597)
* fix meaning of parallel in gather_facts * Update docs with note about parallel not always being faster * add 'smarter' usage of gahter_timeout for parallel tasks * restore async when needed, not always * added typing * parallelism tests
Diffstat (limited to 'lib')
-rw-r--r--lib/ansible/modules/gather_facts.py8
-rw-r--r--lib/ansible/plugins/action/gather_facts.py46
2 files changed, 41 insertions, 13 deletions
diff --git a/lib/ansible/modules/gather_facts.py b/lib/ansible/modules/gather_facts.py
index b099cd87d1..aa67a0e3fd 100644
--- a/lib/ansible/modules/gather_facts.py
+++ b/lib/ansible/modules/gather_facts.py
@@ -26,13 +26,15 @@ options:
- A toggle that controls if the fact modules are executed in parallel or serially and in order.
This can guarantee the merge order of module facts at the expense of performance.
- By default it will be true if more than one fact module is used.
+ - For low cost/delay fact modules parallelism overhead might end up meaning the whole process takes longer.
+ Test your specific case to see if it is a speed improvement or not.
type: bool
attributes:
action:
support: full
async:
- details: multiple modules can be executed in parallel or serially, but the action itself will not be async
- support: partial
+ details: while this action does not support the task 'async' keywords it can do its own parallel processing using the C(parallel) option.
+ support: none
bypass_host_loop:
support: none
check_mode:
@@ -48,6 +50,8 @@ attributes:
notes:
- This is mostly a wrapper around other fact gathering modules.
- Options passed into this action must be supported by all the underlying fact modules configured.
+ - If using C(gather_timeout) and parallel execution, it will limit the total execution time of
+ modules that do not accept C(gather_timeout) themselves.
- Facts returned by each module will be merged, conflicts will favor 'last merged'.
Order is not guaranteed, when doing parallel gathering on multiple modules.
author:
diff --git a/lib/ansible/plugins/action/gather_facts.py b/lib/ansible/plugins/action/gather_facts.py
index 3ff7beb5a8..11cdcc39cd 100644
--- a/lib/ansible/plugins/action/gather_facts.py
+++ b/lib/ansible/plugins/action/gather_facts.py
@@ -6,6 +6,7 @@ __metaclass__ = type
import os
import time
+import typing as t
from ansible import constants as C
from ansible.executor.module_common import get_action_args_with_defaults
@@ -16,12 +17,13 @@ from ansible.utils.vars import merge_hash
class ActionModule(ActionBase):
- def _get_module_args(self, fact_module, task_vars):
+ def _get_module_args(self, fact_module: str, task_vars: dict[str, t.Any]) -> dict[str, t.Any]:
mod_args = self._task.args.copy()
# deal with 'setup specific arguments'
if fact_module not in C._ACTION_SETUP:
+
# TODO: remove in favor of controller side argspec detecing valid arguments
# network facts modules must support gather_subset
try:
@@ -30,16 +32,16 @@ class ActionModule(ActionBase):
name = self._connection._load_name.split('.')[-1]
if name not in ('network_cli', 'httpapi', 'netconf'):
subset = mod_args.pop('gather_subset', None)
- if subset not in ('all', ['all']):
- self._display.warning('Ignoring subset(%s) for %s' % (subset, fact_module))
+ if subset not in ('all', ['all'], None):
+ self._display.warning('Not passing subset(%s) to %s' % (subset, fact_module))
timeout = mod_args.pop('gather_timeout', None)
if timeout is not None:
- self._display.warning('Ignoring timeout(%s) for %s' % (timeout, fact_module))
+ self._display.warning('Not passing timeout(%s) to %s' % (timeout, fact_module))
fact_filter = mod_args.pop('filter', None)
if fact_filter is not None:
- self._display.warning('Ignoring filter(%s) for %s' % (fact_filter, fact_module))
+ self._display.warning('Not passing filter(%s) to %s' % (fact_filter, fact_module))
# Strip out keys with ``None`` values, effectively mimicking ``omit`` behavior
# This ensures we don't pass a ``None`` value as an argument expecting a specific type
@@ -57,7 +59,7 @@ class ActionModule(ActionBase):
return mod_args
- def _combine_task_result(self, result, task_result):
+ def _combine_task_result(self, result: dict[str, t.Any], task_result: dict[str, t.Any]) -> dict[str, t.Any]:
filtered_res = {
'ansible_facts': task_result.get('ansible_facts', {}),
'warnings': task_result.get('warnings', []),
@@ -67,7 +69,7 @@ class ActionModule(ActionBase):
# on conflict the last plugin processed wins, but try to do deep merge and append to lists.
return merge_hash(result, filtered_res, list_merge='append_rp')
- def run(self, tmp=None, task_vars=None):
+ def run(self, tmp: t.Optional[str] = None, task_vars: t.Optional[dict[str, t.Any]] = None) -> dict[str, t.Any]:
self._supports_check_mode = True
@@ -87,16 +89,23 @@ class ActionModule(ActionBase):
failed = {}
skipped = {}
- if parallel is None and len(modules) >= 1:
- parallel = True
+ if parallel is None:
+ if len(modules) > 1:
+ parallel = True
+ else:
+ parallel = False
else:
parallel = boolean(parallel)
- if parallel:
+ timeout = self._task.args.get('gather_timeout', None)
+ async_val = self._task.async_val
+
+ if not parallel:
# serially execute each module
for fact_module in modules:
# just one module, no need for fancy async
mod_args = self._get_module_args(fact_module, task_vars)
+ # TODO: use gather_timeout to cut module execution if module itself does not support gather_timeout
res = self._execute_module(module_name=fact_module, module_args=mod_args, task_vars=task_vars, wrap_async=False)
if res.get('failed', False):
failed[fact_module] = res
@@ -107,10 +116,21 @@ class ActionModule(ActionBase):
self._remove_tmp_path(self._connection._shell.tmpdir)
else:
- # do it async
+ # do it async, aka parallel
jobs = {}
+
for fact_module in modules:
mod_args = self._get_module_args(fact_module, task_vars)
+
+ # if module does not handle timeout, use timeout to handle module, hijack async_val as this is what async_wrapper uses
+ # TODO: make this action compain about async/async settings, use parallel option instead .. or remove parallel in favor of async settings?
+ if timeout and 'gather_timeout' not in mod_args:
+ self._task.async_val = int(timeout)
+ elif async_val != 0:
+ self._task.async_val = async_val
+ else:
+ self._task.async_val = 0
+
self._display.vvvv("Running %s" % fact_module)
jobs[fact_module] = (self._execute_module(module_name=fact_module, module_args=mod_args, task_vars=task_vars, wrap_async=True))
@@ -132,6 +152,10 @@ class ActionModule(ActionBase):
else:
time.sleep(0.5)
+ # restore value for post processing
+ if self._task.async_val != async_val:
+ self._task.async_val = async_val
+
if skipped:
result['msg'] = "The following modules were skipped: %s\n" % (', '.join(skipped.keys()))
result['skipped_modules'] = skipped