summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Martz <matt@sivel.net>2023-04-18 12:01:47 -0500
committerGitHub <noreply@github.com>2023-04-18 12:01:47 -0500
commitf90c7f1a7d38f317b3d22977f06ef1b15311a4a7 (patch)
tree09f8954d189f770624298e35164ba9a8df41f2c4
parent12083db2c535f2de0c50c5fb14a47e43c289f703 (diff)
downloadansible-f90c7f1a7d38f317b3d22977f06ef1b15311a4a7.tar.gz
[stable-2.15] Implement checks, and backwards compat change, to move forward with UTF-8 only (#80370) (#80545)
(cherry picked from commit 0ee7cfb)
-rw-r--r--changelogs/fragments/80258-defensive-display-non-utf8.yml4
-rw-r--r--docs/docsite/rst/dev_guide/developing_modules_best_practices.rst1
-rw-r--r--lib/ansible/config/base.yml11
-rw-r--r--lib/ansible/module_utils/common/text/converters.py11
-rw-r--r--lib/ansible/modules/command.py2
-rw-r--r--lib/ansible/modules/expect.py2
-rw-r--r--lib/ansible/modules/raw.py2
-rw-r--r--lib/ansible/modules/script.py2
-rw-r--r--lib/ansible/modules/shell.py2
-rw-r--r--lib/ansible/plugins/action/__init__.py24
-rw-r--r--lib/ansible/utils/display.py17
11 files changed, 70 insertions, 8 deletions
diff --git a/changelogs/fragments/80258-defensive-display-non-utf8.yml b/changelogs/fragments/80258-defensive-display-non-utf8.yml
new file mode 100644
index 0000000000..5e9ed076a8
--- /dev/null
+++ b/changelogs/fragments/80258-defensive-display-non-utf8.yml
@@ -0,0 +1,4 @@
+bugfixes:
+- Display - Defensively configure writing to stdout and stderr with a custom encoding error handler that will replace invalid characters
+ while providing a deprecation warning that non-utf8 text will result in an error in a future version.
+- module responses - Ensure that module responses are utf-8 adhereing to JSON RFC and expectations of the core code.
diff --git a/docs/docsite/rst/dev_guide/developing_modules_best_practices.rst b/docs/docsite/rst/dev_guide/developing_modules_best_practices.rst
index e9cfb6e845..20861ab58e 100644
--- a/docs/docsite/rst/dev_guide/developing_modules_best_practices.rst
+++ b/docs/docsite/rst/dev_guide/developing_modules_best_practices.rst
@@ -124,6 +124,7 @@ Creating correct and informative module output
Modules must output valid JSON only. Follow these guidelines for creating correct, useful module output:
+* Module return data must be encoded as strict UTF-8. Modules that cannot return UTF-8 encoded data should return the data encoded by something such as base64. Optionally modules can make the determination if they can encode as UTF-8 and utilize ``errors='replace'`` to replace non UTF-8 characters making the return values lossy.
* Make your top-level return type a hash (dictionary).
* Nest complex return values within the top-level hash.
* Incorporate any lists or simple scalar values within the top-level return hash.
diff --git a/lib/ansible/config/base.yml b/lib/ansible/config/base.yml
index 6449ed8b8c..052a8f0834 100644
--- a/lib/ansible/config/base.yml
+++ b/lib/ansible/config/base.yml
@@ -1752,6 +1752,17 @@ MODULE_IGNORE_EXTS:
ini:
- {key: module_ignore_exts, section: defaults}
type: list
+MODULE_STRICT_UTF8_RESPONSE:
+ name: Module strict UTF-8 response
+ description:
+ - Enables whether module responses are evaluated for containing non UTF-8 data
+ - Disabling this may result in unexpected behavior
+ - Only ansible-core should evaluate this configuration
+ env: [{name: ANSIBLE_MODULE_STRICT_UTF8_RESPONSE}]
+ ini:
+ - {key: module_strict_utf8_response, section: defaults}
+ type: bool
+ default: True
OLD_PLUGIN_CACHE_CLEARING:
description: Previously Ansible would only clear some of the plugin loading caches when loading new roles, this led to some behaviours in which a plugin loaded in previous plays would be unexpectedly 'sticky'. This setting allows to return to that behaviour.
env: [{name: ANSIBLE_OLD_PLUGIN_CACHE_CLEAR}]
diff --git a/lib/ansible/module_utils/common/text/converters.py b/lib/ansible/module_utils/common/text/converters.py
index dcfb89a8d7..f33e76cd68 100644
--- a/lib/ansible/module_utils/common/text/converters.py
+++ b/lib/ansible/module_utils/common/text/converters.py
@@ -268,18 +268,13 @@ def _json_encode_fallback(obj):
def jsonify(data, **kwargs):
+ # After 2.18, we should remove this loop, and hardcode to utf-8 in alignment with requiring utf-8 module responses
for encoding in ("utf-8", "latin-1"):
try:
- return json.dumps(data, encoding=encoding, default=_json_encode_fallback, **kwargs)
- # Old systems using old simplejson module does not support encoding keyword.
- except TypeError:
- try:
- new_data = container_to_text(data, encoding=encoding)
- except UnicodeDecodeError:
- continue
- return json.dumps(new_data, default=_json_encode_fallback, **kwargs)
+ new_data = container_to_text(data, encoding=encoding)
except UnicodeDecodeError:
continue
+ return json.dumps(new_data, default=_json_encode_fallback, **kwargs)
raise UnicodeError('Invalid unicode encoding encountered')
diff --git a/lib/ansible/modules/command.py b/lib/ansible/modules/command.py
index 490c0ca5a9..a8d6bf4ebe 100644
--- a/lib/ansible/modules/command.py
+++ b/lib/ansible/modules/command.py
@@ -101,6 +101,8 @@ notes:
- The C(executable) parameter is removed since version 2.4. If you have a need for this parameter, use the M(ansible.builtin.shell) module instead.
- For Windows targets, use the M(ansible.windows.win_command) module instead.
- For rebooting systems, use the M(ansible.builtin.reboot) or M(ansible.windows.win_reboot) module.
+ - If the command returns non UTF-8 data, it must be encoded to avoid issues. This may necessitate using M(ansible.builtin.shell) so the output
+ can be piped through C(base64).
seealso:
- module: ansible.builtin.raw
- module: ansible.builtin.script
diff --git a/lib/ansible/modules/expect.py b/lib/ansible/modules/expect.py
index b8c9aa7ee4..9ca799dac7 100644
--- a/lib/ansible/modules/expect.py
+++ b/lib/ansible/modules/expect.py
@@ -81,6 +81,8 @@ notes:
- The M(ansible.builtin.expect) module is designed for simple scenarios.
For more complex needs, consider the use of expect code with the M(ansible.builtin.shell)
or M(ansible.builtin.script) modules. (An example is part of the M(ansible.builtin.shell) module documentation).
+ - If the command returns non UTF-8 data, it must be encoded to avoid issues. One option is to pipe
+ the output through C(base64).
seealso:
- module: ansible.builtin.script
- module: ansible.builtin.shell
diff --git a/lib/ansible/modules/raw.py b/lib/ansible/modules/raw.py
index dc40a7391c..60840d044a 100644
--- a/lib/ansible/modules/raw.py
+++ b/lib/ansible/modules/raw.py
@@ -39,6 +39,8 @@ description:
- This module does not require python on the remote system, much like
the M(ansible.builtin.script) module.
- This module is also supported for Windows targets.
+ - If the command returns non UTF-8 data, it must be encoded to avoid issues. One option is to pipe
+ the output through C(base64).
extends_documentation_fragment:
- action_common_attributes
- action_common_attributes.raw
diff --git a/lib/ansible/modules/script.py b/lib/ansible/modules/script.py
index 2cefc0a4bb..fa2eea601d 100644
--- a/lib/ansible/modules/script.py
+++ b/lib/ansible/modules/script.py
@@ -47,6 +47,8 @@ notes:
stderr is sent to stdout. If you depend on separated stdout and stderr result keys, please switch to a copy+command set of tasks instead of using script.
- If the path to the local script contains spaces, it needs to be quoted.
- This module is also supported for Windows targets.
+ - If the script returns non UTF-8 data, it must be encoded to avoid issues. One option is to pipe
+ the output through C(base64).
seealso:
- module: ansible.builtin.shell
- module: ansible.windows.win_shell
diff --git a/lib/ansible/modules/shell.py b/lib/ansible/modules/shell.py
index 52fda1b015..c4c359ec53 100644
--- a/lib/ansible/modules/shell.py
+++ b/lib/ansible/modules/shell.py
@@ -90,6 +90,8 @@ notes:
- An alternative to using inline shell scripts with this module is to use
the M(ansible.builtin.script) module possibly together with the M(ansible.builtin.template) module.
- For rebooting systems, use the M(ansible.builtin.reboot) or M(ansible.windows.win_reboot) module.
+ - If the command returns non UTF-8 data, it must be encoded to avoid issues. One option is to pipe
+ the output through C(base64).
seealso:
- module: ansible.builtin.command
- module: ansible.builtin.raw
diff --git a/lib/ansible/plugins/action/__init__.py b/lib/ansible/plugins/action/__init__.py
index 436fb65ae8..5ded3a8aa3 100644
--- a/lib/ansible/plugins/action/__init__.py
+++ b/lib/ansible/plugins/action/__init__.py
@@ -39,6 +39,18 @@ from ansible.utils.plugin_docs import get_versioned_doclink
display = Display()
+def _validate_utf8_json(d):
+ if isinstance(d, text_type):
+ # Purposefully not using to_bytes here for performance reasons
+ d.encode(encoding='utf-8', errors='strict')
+ elif isinstance(d, dict):
+ for o in d.items():
+ _validate_utf8_json(o)
+ elif isinstance(d, (list, tuple)):
+ for o in d:
+ _validate_utf8_json(o)
+
+
class ActionBase(ABC):
'''
@@ -1232,6 +1244,18 @@ class ActionBase(ABC):
display.warning(w)
data = json.loads(filtered_output)
+
+ if C.MODULE_STRICT_UTF8_RESPONSE and not data.pop('_ansible_trusted_utf8', None):
+ try:
+ _validate_utf8_json(data)
+ except UnicodeEncodeError:
+ # When removing this, also remove the loop and latin-1 from ansible.module_utils.common.text.converters.jsonify
+ display.deprecated(
+ f'Module "{self._task.resolved_action or self._task.action}" returned non UTF-8 data in '
+ 'the JSON response. This will become an error in the future',
+ version='2.18',
+ )
+
data['_ansible_parsed'] = True
except ValueError:
# not valid json, lets try to capture error
diff --git a/lib/ansible/utils/display.py b/lib/ansible/utils/display.py
index b4ff01c006..7f059a16a6 100644
--- a/lib/ansible/utils/display.py
+++ b/lib/ansible/utils/display.py
@@ -26,6 +26,7 @@ else:
# this will be set to False if curses.setupterm() fails
HAS_CURSES = True
+import codecs
import ctypes.util
import fcntl
import getpass
@@ -297,8 +298,24 @@ class Display(metaclass=Singleton):
except Exception as ex:
self.warning(f"failed to patch stdout/stderr for fork-safety: {ex}")
+ codecs.register_error('_replacing_warning_handler', self._replacing_warning_handler)
+ try:
+ sys.stdout.reconfigure(errors='_replacing_warning_handler')
+ sys.stderr.reconfigure(errors='_replacing_warning_handler')
+ except Exception as ex:
+ self.warning(f"failed to reconfigure stdout/stderr with custom encoding error handler: {ex}")
+
self.setup_curses = False
+ def _replacing_warning_handler(self, exception):
+ # TODO: This should probably be deferred until after the current display is completed
+ # this will require some amount of new functionality
+ self.deprecated(
+ 'Non UTF-8 encoded data replaced with "?" while displaying text to stdout/stderr, this is temporary and will become an error',
+ version='2.18',
+ )
+ return '?', exception.end
+
def set_queue(self, queue):
"""Set the _final_q on Display, so that we know to proxy display over the queue
instead of directly writing to stdout/stderr from forks