From 26f5e4b32d9e7ecf35089aedd4abbb46fcf02bb3 Mon Sep 17 00:00:00 2001
From: Toshio Kuratomi <a.badger@gmail.com>
Date: Wed, 18 Jan 2017 21:27:13 -0800
Subject: Fix linenos being wrong in inventory errors

Fixes #20356

(cherry picked from commit 05879d331ae23b9c4a77b70425c3f8e48ab2e8ea)
---
 lib/ansible/inventory/ini.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/lib/ansible/inventory/ini.py b/lib/ansible/inventory/ini.py
index aa54c3d55a..09cc5623e5 100644
--- a/lib/ansible/inventory/ini.py
+++ b/lib/ansible/inventory/ini.py
@@ -38,6 +38,8 @@ class InventoryParser(object):
     Takes an INI-format inventory file and builds a list of groups and subgroups
     with their associated hosts and variable settings.
     """
+    _COMMENT_MARKERS = frozenset((u';', u'#'))
+    b_COMMENT_MARKERS = frozenset((b';', b'#'))
 
     def __init__(self, loader, groups, filename=C.DEFAULT_HOST_LIST):
         self.filename = filename
@@ -60,12 +62,19 @@ class InventoryParser(object):
         try:
             # Faster to do to_text once on a long string than many
             # times on smaller strings
-            data = to_text(b_data, errors='surrogate_or_strict')
-            data = [line for line in data.splitlines() if not (line.startswith(u';') or line.startswith(u'#'))]
+            data = to_text(b_data, errors='surrogate_or_strict').splitlines()
         except UnicodeError:
-            # Skip comment lines here to avoid potential undecodable
-            # errors in comments: https://github.com/ansible/ansible/issues/17593
-            data = [to_text(line, errors='surrogate_or_strict') for line in b_data.splitlines() if not (line.startswith(b';') or line.startswith(b'#'))]
+            # Handle non-utf8 in comment lines: https://github.com/ansible/ansible/issues/17593
+            data = []
+            for line in b_data.splitlines():
+                if line and line[0] in self.b_COMMENT_MARKERS:
+                    # Replace is okay for comment lines
+                    #data.append(to_text(line, errors='surrogate_or_replace'))
+                    # Currently we only need these lines for accurate lineno in errors
+                    data.append(u'')
+                else:
+                    # Non-comment lines still have to be valid uf-8
+                    data.append(to_text(line, errors='surrogate_or_strict'))
 
         self._parse(data)
 
@@ -95,8 +104,8 @@ class InventoryParser(object):
 
             line = line.strip()
 
-            # Skip empty lines
-            if not line:
+            # Skip empty lines and comments
+            if not line or line[0] in self._COMMENT_MARKERS:
                 continue
 
             # Is this a [section] header? That tells us what group we're parsing
-- 
cgit v1.2.1