summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDani Alcala <112832187+clavedeluna@users.noreply.github.com>2022-10-23 14:16:34 -0300
committerPierre Sassoulas <pierre.sassoulas@gmail.com>2022-11-17 14:03:15 +0100
commit288a88d8c6292bf419b0bb46cad4f013771618c8 (patch)
tree174c217df3eb56140692aef3d1d2ab728d6a7ada
parent970e14b211dfe65f205e3ba7c17f052091f03cd9 (diff)
downloadpylint-git-288a88d8c6292bf419b0bb46cad4f013771618c8.tar.gz
Fix astroid-error for parsing module encoding (#7663)
Co-authored-by: Pierre Sassoulas <pierre.sassoulas@gmail.com>
-rw-r--r--doc/whatsnew/fragments/7661.bugfix3
-rw-r--r--pylint/checkers/unicode.py2
-rw-r--r--tests/regrtest_data/encoding/bad_missing_num.py1
-rw-r--r--tests/regrtest_data/encoding/bad_wrong_num.py1
-rw-r--r--tests/regrtest_data/encoding/good.py1
-rw-r--r--tests/test_self.py21
6 files changed, 27 insertions, 2 deletions
diff --git a/doc/whatsnew/fragments/7661.bugfix b/doc/whatsnew/fragments/7661.bugfix
new file mode 100644
index 000000000..2e58c861b
--- /dev/null
+++ b/doc/whatsnew/fragments/7661.bugfix
@@ -0,0 +1,3 @@
+Fix crash that happened when parsing files with unexpected encoding starting with 'utf' like ``utf13``.
+
+Closes #7661
diff --git a/pylint/checkers/unicode.py b/pylint/checkers/unicode.py
index b5123ef17..35a0cd7fc 100644
--- a/pylint/checkers/unicode.py
+++ b/pylint/checkers/unicode.py
@@ -218,7 +218,7 @@ def _normalize_codec_name(codec: str) -> str:
def _remove_bom(encoded: bytes, encoding: str) -> bytes:
"""Remove the bom if given from a line."""
- if not encoding.startswith("utf"):
+ if encoding not in UNICODE_BOMS:
return encoded
bom = UNICODE_BOMS[encoding]
if encoded.startswith(bom):
diff --git a/tests/regrtest_data/encoding/bad_missing_num.py b/tests/regrtest_data/encoding/bad_missing_num.py
new file mode 100644
index 000000000..a43139838
--- /dev/null
+++ b/tests/regrtest_data/encoding/bad_missing_num.py
@@ -0,0 +1 @@
+# -*- encoding: utf -*-
diff --git a/tests/regrtest_data/encoding/bad_wrong_num.py b/tests/regrtest_data/encoding/bad_wrong_num.py
new file mode 100644
index 000000000..5c6bfe786
--- /dev/null
+++ b/tests/regrtest_data/encoding/bad_wrong_num.py
@@ -0,0 +1 @@
+# -*- encoding: utf-9 -*-
diff --git a/tests/regrtest_data/encoding/good.py b/tests/regrtest_data/encoding/good.py
new file mode 100644
index 000000000..dae354a67
--- /dev/null
+++ b/tests/regrtest_data/encoding/good.py
@@ -0,0 +1 @@
+# -*- encoding: utf-8 -*-
diff --git a/tests/test_self.py b/tests/test_self.py
index a69b45aaf..010e60682 100644
--- a/tests/test_self.py
+++ b/tests/test_self.py
@@ -148,7 +148,9 @@ class TestRunTC:
output = re.sub(CLEAN_PATH, "", output, flags=re.MULTILINE)
return output.replace("\\", "/")
- def _test_output(self, args: list[str], expected_output: str) -> None:
+ def _test_output(
+ self, args: list[str], expected_output: str, unexpected_output: str = ""
+ ) -> None:
out = StringIO()
args = _add_rcfile_default_pylintrc(args)
self._run_pylint(args, out=out)
@@ -156,6 +158,9 @@ class TestRunTC:
expected_output = self._clean_paths(expected_output)
assert expected_output.strip() in actual_output.strip()
+ if unexpected_output:
+ assert unexpected_output.strip() not in actual_output.strip()
+
def _test_output_file(
self, args: list[str], filename: LocalPath, expected_output: str
) -> None:
@@ -1196,6 +1201,20 @@ a.py:1:4: E0001: Parsing failed: 'invalid syntax (<unknown>, line 1)' (syntax-er
expected_output = "unknown encoding"
self._test_output([module, "-E"], expected_output=expected_output)
+ @pytest.mark.parametrize(
+ "module_name,expected_output",
+ [
+ ("good.py", ""),
+ ("bad_wrong_num.py", "(syntax-error)"),
+ ("bad_missing_num.py", "(bad-file-encoding)"),
+ ],
+ )
+ def test_encoding(self, module_name: str, expected_output: str) -> None:
+ path = join(HERE, "regrtest_data", "encoding", module_name)
+ self._test_output(
+ [path], expected_output=expected_output, unexpected_output="(astroid-error)"
+ )
+
class TestCallbackOptions:
"""Test for all callback options we support."""