diff options
6 files changed, 24 insertions, 4 deletions
diff --git a/pylint/checkers/strings.py b/pylint/checkers/strings.py index d1f227133..92dcbbeec 100644 --- a/pylint/checkers/strings.py +++ b/pylint/checkers/strings.py @@ -592,12 +592,20 @@ class StringConstantChecker(BaseTokenChecker): self._unicode_literals = "unicode_literals" in module.future_imports def process_tokens(self, tokens): - for i, (tok_type, token, start, _, _) in enumerate(tokens): - if tok_type == tokenize.STRING: + encoding = "ascii" + for i, (tok_type, token, start, _, line) in enumerate(tokens): + if tok_type == tokenize.ENCODING: + # this is always the first token processed + encoding = token + elif tok_type == tokenize.STRING: # 'token' is the whole un-parsed token; we can look at the start # of it to see whether it's a raw or unicode string etc. self.process_string_token(token, start[0]) next_token = tokens[i + 1] if i + 1 < len(tokens) else None + if encoding != "ascii": + # We convert `tokenize` character count into a byte count, + # to match with astroid `.col_offset` + start = (start[0], len(line[: start[1]].encode(encoding))) self.string_tokens[start] = (str_eval(token), next_token) @check_messages(*(MSGS.keys())) @@ -618,6 +626,10 @@ class StringConstantChecker(BaseTokenChecker): if elt.col_offset < 0: # This can happen in case of escaped newlines continue + if (elt.lineno, elt.col_offset) not in self.string_tokens: + # This may happen with Latin1 encoding + # cf. https://github.com/PyCQA/pylint/issues/2610 + continue matching_token, next_token = self.string_tokens[ (elt.lineno, elt.col_offset) ] diff --git a/pylint/test/functional/implicit_str_concat_in_sequence_latin1.py b/pylint/test/functional/implicit_str_concat_in_sequence_latin1.py new file mode 100644 index 000000000..44a2a94cb --- /dev/null +++ b/pylint/test/functional/implicit_str_concat_in_sequence_latin1.py @@ -0,0 +1,4 @@ +# coding: latin_1 +#pylint: disable=bad-continuation,invalid-name,missing-docstring + +TOTO = ('Café', 'Café', 'Café') diff --git a/pylint/test/functional/implicit_str_concat_in_sequence_latin1.txt b/pylint/test/functional/implicit_str_concat_in_sequence_latin1.txt new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/pylint/test/functional/implicit_str_concat_in_sequence_latin1.txt diff --git a/pylint/test/functional/implicit_str_concat_in_sequence_utf8.py b/pylint/test/functional/implicit_str_concat_in_sequence_utf8.py new file mode 100644 index 000000000..fa996e90c --- /dev/null +++ b/pylint/test/functional/implicit_str_concat_in_sequence_utf8.py @@ -0,0 +1,3 @@ +#pylint: disable=bad-continuation,invalid-name,missing-docstring + +TOTO = ('CafĂ©', 'CafĂ©', 'CafĂ©') diff --git a/pylint/test/functional/implicit_str_concat_in_sequence_utf8.txt b/pylint/test/functional/implicit_str_concat_in_sequence_utf8.txt new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/pylint/test/functional/implicit_str_concat_in_sequence_utf8.txt diff --git a/pylint/test/test_functional.py b/pylint/test/test_functional.py index c5d19bb09..576cd0d56 100644 --- a/pylint/test/test_functional.py +++ b/pylint/test/test_functional.py @@ -279,8 +279,9 @@ class LintModuleTest(object): def _open_source_file(self): if self._test_file.base == "invalid_encoded_data": return open(self._test_file.source) - else: - return io.open(self._test_file.source, encoding="utf8") + if "latin1" in self._test_file.base: + return io.open(self._test_file.source, encoding="latin1") + return io.open(self._test_file.source, encoding="utf8") def _get_expected(self): with self._open_source_file() as fobj: |