Fix bug (#18355) where encrypted inventories fail 18355 (#18373)

* Fix bug (#18355) where encrypted inventories fail This is first part of fix for #18355 * Make DataLoader._get_file_contents return bytes The issue #18355 is caused by a change to inventory to stop using _get_file_contents so that it can handle text encoding itself to better protect against harmless text encoding errors in ini files (invalid unicode text in comment fields). So this makes _get_file_contents return bytes so it and other callers can handle the to_text(). The data returned by _get_file_contents() is now a bytes object instead of a text object. The callers of _get_file_contents() have been updated to call to_text() themselves on the results. Previously, the ini parser attempted to work around ini files that potentially include non-vailid unicode in comment lines. To do this, it stopped using DataLoader._get_file_contents() which does the decryption of files if vault encrypted. It didn't use that because _get_file_contents previously did to_text() on the read data itself. _get_file_contents() returns a bytestring now, so ini.py can call it and still special case ini file comments when converting to_text(). That also means encrypted inventory files are decrypted first. Fixes #18355
author: Adrian Likins <alikins@redhat.com> 2016-11-07 10:07:26 -0500
committer: GitHub <noreply@github.com> 2016-11-07 10:07:26 -0500
commit: dd0189839eaded82df9e2953600d97272871f2f8 (patch)
tree: 0d4bd3b057c23b19ed21216fb84e7fea49a13234
parent: 5aaf1d1a15bea3b968831c00d7a8347448ead06a (diff)
download: ansible-dd0189839eaded82df9e2953600d97272871f2f8.tar.gz
6 files changed, 32 insertions, 20 deletions
diff --git a/lib/ansible/inventory/ini.py b/lib/ansible/inventory/ini.py
index c609ad0e4e..aa54c3d55a 100644
--- a/lib/ansible/inventory/ini.py
+++ b/lib/ansible/inventory/ini.py
@@ -51,18 +51,21 @@ class InventoryParser(object):
 
         # Read in the hosts, groups, and variables defined in the
         # inventory file.
+        if loader:
+            (b_data, private) = loader._get_file_contents(filename)
+        else:
+            with open(filename, 'rb') as fh:
+                b_data = fh.read()
 
-        with open(filename, 'rb') as fh:
-            data = fh.read()
-            try:
-                # Faster to do to_text once on a long string than many
-                # times on smaller strings
-                data = to_text(data, errors='surrogate_or_strict')
-                data = [line for line in data.splitlines() if not (line.startswith(u';') or line.startswith(u'#'))]
-            except UnicodeError:
-                # Skip comment lines here to avoid potential undecodable
-                # errors in comments: https://github.com/ansible/ansible/issues/17593
-                data = [to_text(line, errors='surrogate_or_strict') for line in data.splitlines() if not (line.startswith(b';') or line.startswith(b'#'))]
+        try:
+            # Faster to do to_text once on a long string than many
+            # times on smaller strings
+            data = to_text(b_data, errors='surrogate_or_strict')
+            data = [line for line in data.splitlines() if not (line.startswith(u';') or line.startswith(u'#'))]
+        except UnicodeError:
+            # Skip comment lines here to avoid potential undecodable
+            # errors in comments: https://github.com/ansible/ansible/issues/17593
+            data = [to_text(line, errors='surrogate_or_strict') for line in b_data.splitlines() if not (line.startswith(b';') or line.startswith(b'#'))]
 
         self._parse(data)
 
diff --git a/lib/ansible/parsing/dataloader.py b/lib/ansible/parsing/dataloader.py
index a3ddd6e8fb..594475bb20 100644
--- a/lib/ansible/parsing/dataloader.py
+++ b/lib/ansible/parsing/dataloader.py
@@ -116,7 +116,9 @@ class DataLoader():
             parsed_data = self._FILE_CACHE[file_name]
         else:
             # read the file contents and load the data structure from them
-            (file_data, show_content) = self._get_file_contents(file_name)
+            (b_file_data, show_content) = self._get_file_contents(file_name)
+
+            file_data = to_text(b_file_data, errors='surrogate_or_strict')
             parsed_data = self.load(data=file_data, file_name=file_name, show_content=show_content)
 
             # cache the file contents for next time
@@ -178,7 +180,6 @@ class DataLoader():
                     data = self._vault.decrypt(data, filename=b_file_name)
                     show_content = False
 
-            data = to_text(data, errors='surrogate_or_strict')
             return (data, show_content)
 
         except (IOError, OSError) as e:
diff --git a/lib/ansible/plugins/action/include_vars.py b/lib/ansible/plugins/action/include_vars.py
index ebe29b2f14..554c7d2c04 100644
--- a/lib/ansible/plugins/action/include_vars.py
+++ b/lib/ansible/plugins/action/include_vars.py
@@ -22,7 +22,7 @@ from os import path, walk
 import re
 
 from ansible.errors import AnsibleError
-from ansible.module_utils._text import to_native
+from ansible.module_utils._text import to_native, to_text
 from ansible.plugins.action import ActionBase
 
 
@@ -245,7 +245,9 @@ class ActionModule(ActionBase):
             )
             return failed, err_msg, results
 
-        data, show_content = self._loader._get_file_contents(filename)
+        b_data, show_content = self._loader._get_file_contents(filename)
+        data = to_text(b_data, errors='surrogate_or_strict')
+
         self.show_content = show_content
         data = self._loader.load(data, show_content)
         if not data:
diff --git a/lib/ansible/plugins/lookup/file.py b/lib/ansible/plugins/lookup/file.py
index a14951f600..5b687e9bd5 100644
--- a/lib/ansible/plugins/lookup/file.py
+++ b/lib/ansible/plugins/lookup/file.py
@@ -19,6 +19,7 @@ __metaclass__ = type
 
 from ansible.errors import AnsibleError, AnsibleParserError
 from ansible.plugins.lookup import LookupBase
+from ansible.module_utils._text import to_text
 
 try:
     from __main__ import display
@@ -41,7 +42,8 @@ class LookupModule(LookupBase):
             display.vvvv(u"File lookup using %s as file" % lookupfile)
             try:
                 if lookupfile:
-                    contents, show_data = self._loader._get_file_contents(lookupfile)
+                    b_contents, show_data = self._loader._get_file_contents(lookupfile)
+                    contents = to_text(b_contents, errors='surrogate_or_strict')
                     ret.append(contents.rstrip())
                 else:
                     raise AnsibleParserError()
diff --git a/test/units/mock/loader.py b/test/units/mock/loader.py
index 2027e183c1..f991d2c910 100644
--- a/test/units/mock/loader.py
+++ b/test/units/mock/loader.py
@@ -23,6 +23,8 @@ import os
 
 from ansible.errors import AnsibleParserError
 from ansible.parsing.dataloader import DataLoader
+from ansible.module_utils._text import to_bytes
+
 
 class DictDataLoader(DataLoader):
 
@@ -39,9 +41,11 @@ class DictDataLoader(DataLoader):
             return self.load(self._file_mapping[path], path)
         return None
 
+    # TODO: the real _get_file_contents returns a bytestring, so we actually convert the
+    #       unicode/text it's created with to utf-8
     def _get_file_contents(self, path):
         if path in self._file_mapping:
-            return (self._file_mapping[path], False)
+            return (to_bytes(self._file_mapping[path]), False)
         else:
             raise AnsibleParserError("file not found: %s" % path)
 
diff --git a/test/units/parsing/test_data_loader.py b/test/units/parsing/test_data_loader.py
index 0182d40d3e..b6ec8247e5 100644
--- a/test/units/parsing/test_data_loader.py
+++ b/test/units/parsing/test_data_loader.py
@@ -37,13 +37,13 @@ class TestDataLoader(unittest.TestCase):
 
     @patch.object(DataLoader, '_get_file_contents')
     def test_parse_json_from_file(self, mock_def):
-        mock_def.return_value = ("""{"a": 1, "b": 2, "c": 3}""", True)
+        mock_def.return_value = (b"""{"a": 1, "b": 2, "c": 3}""", True)
         output = self._loader.load_from_file('dummy_json.txt')
         self.assertEqual(output, dict(a=1,b=2,c=3))
 
     @patch.object(DataLoader, '_get_file_contents')
     def test_parse_yaml_from_file(self, mock_def):
-        mock_def.return_value = ("""
+        mock_def.return_value = (b"""
         a: 1
         b: 2
         c: 3
@@ -53,7 +53,7 @@ class TestDataLoader(unittest.TestCase):
 
     @patch.object(DataLoader, '_get_file_contents')
     def test_parse_fail_from_file(self, mock_def):
-        mock_def.return_value = ("""
+        mock_def.return_value = (b"""
         TEXT:
             ***
                NOT VALID
author	Adrian Likins <alikins@redhat.com>	2016-11-07 10:07:26 -0500
committer	GitHub <noreply@github.com>	2016-11-07 10:07:26 -0500
commit	dd0189839eaded82df9e2953600d97272871f2f8 (patch)
tree	0d4bd3b057c23b19ed21216fb84e7fea49a13234
parent	5aaf1d1a15bea3b968831c00d7a8347448ead06a (diff)
download	ansible-dd0189839eaded82df9e2953600d97272871f2f8.tar.gz