summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLucas Cimon <lucas.cimon@gmail.com>2018-11-26 14:22:04 +0100
committerClaudiu Popa <pcmanticore@gmail.com>2018-11-26 14:22:04 +0100
commitf4ebdce59f7043eb612d3363244f3c3cb7b7b8ce (patch)
treec6284f3d79aa3f0e98d4200e801acf9f88f4cb8c
parent1ac83855b38a8d053ec5d0403a5c0e6e8b191fbb (diff)
downloadpylint-git-f4ebdce59f7043eb612d3363244f3c3cb7b7b8ce.tar.gz
implicit-str-concat-in-sequence: Handling lines with multi-bytes characters - fix #2610 (#2611)
-rw-r--r--pylint/checkers/strings.py16
-rw-r--r--pylint/test/functional/implicit_str_concat_in_sequence_latin1.py4
-rw-r--r--pylint/test/functional/implicit_str_concat_in_sequence_latin1.txt0
-rw-r--r--pylint/test/functional/implicit_str_concat_in_sequence_utf8.py3
-rw-r--r--pylint/test/functional/implicit_str_concat_in_sequence_utf8.txt0
-rw-r--r--pylint/test/test_functional.py5
6 files changed, 24 insertions, 4 deletions
diff --git a/pylint/checkers/strings.py b/pylint/checkers/strings.py
index d1f227133..92dcbbeec 100644
--- a/pylint/checkers/strings.py
+++ b/pylint/checkers/strings.py
@@ -592,12 +592,20 @@ class StringConstantChecker(BaseTokenChecker):
self._unicode_literals = "unicode_literals" in module.future_imports
def process_tokens(self, tokens):
- for i, (tok_type, token, start, _, _) in enumerate(tokens):
- if tok_type == tokenize.STRING:
+ encoding = "ascii"
+ for i, (tok_type, token, start, _, line) in enumerate(tokens):
+ if tok_type == tokenize.ENCODING:
+ # this is always the first token processed
+ encoding = token
+ elif tok_type == tokenize.STRING:
# 'token' is the whole un-parsed token; we can look at the start
# of it to see whether it's a raw or unicode string etc.
self.process_string_token(token, start[0])
next_token = tokens[i + 1] if i + 1 < len(tokens) else None
+ if encoding != "ascii":
+ # We convert `tokenize` character count into a byte count,
+ # to match with astroid `.col_offset`
+ start = (start[0], len(line[: start[1]].encode(encoding)))
self.string_tokens[start] = (str_eval(token), next_token)
@check_messages(*(MSGS.keys()))
@@ -618,6 +626,10 @@ class StringConstantChecker(BaseTokenChecker):
if elt.col_offset < 0:
# This can happen in case of escaped newlines
continue
+ if (elt.lineno, elt.col_offset) not in self.string_tokens:
+ # This may happen with Latin1 encoding
+ # cf. https://github.com/PyCQA/pylint/issues/2610
+ continue
matching_token, next_token = self.string_tokens[
(elt.lineno, elt.col_offset)
]
diff --git a/pylint/test/functional/implicit_str_concat_in_sequence_latin1.py b/pylint/test/functional/implicit_str_concat_in_sequence_latin1.py
new file mode 100644
index 000000000..44a2a94cb
--- /dev/null
+++ b/pylint/test/functional/implicit_str_concat_in_sequence_latin1.py
@@ -0,0 +1,4 @@
+# coding: latin_1
+#pylint: disable=bad-continuation,invalid-name,missing-docstring
+
+TOTO = ('Café', 'Café', 'Café')
diff --git a/pylint/test/functional/implicit_str_concat_in_sequence_latin1.txt b/pylint/test/functional/implicit_str_concat_in_sequence_latin1.txt
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/pylint/test/functional/implicit_str_concat_in_sequence_latin1.txt
diff --git a/pylint/test/functional/implicit_str_concat_in_sequence_utf8.py b/pylint/test/functional/implicit_str_concat_in_sequence_utf8.py
new file mode 100644
index 000000000..fa996e90c
--- /dev/null
+++ b/pylint/test/functional/implicit_str_concat_in_sequence_utf8.py
@@ -0,0 +1,3 @@
+#pylint: disable=bad-continuation,invalid-name,missing-docstring
+
+TOTO = ('Café', 'Café', 'Café')
diff --git a/pylint/test/functional/implicit_str_concat_in_sequence_utf8.txt b/pylint/test/functional/implicit_str_concat_in_sequence_utf8.txt
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/pylint/test/functional/implicit_str_concat_in_sequence_utf8.txt
diff --git a/pylint/test/test_functional.py b/pylint/test/test_functional.py
index c5d19bb09..576cd0d56 100644
--- a/pylint/test/test_functional.py
+++ b/pylint/test/test_functional.py
@@ -279,8 +279,9 @@ class LintModuleTest(object):
def _open_source_file(self):
if self._test_file.base == "invalid_encoded_data":
return open(self._test_file.source)
- else:
- return io.open(self._test_file.source, encoding="utf8")
+ if "latin1" in self._test_file.base:
+ return io.open(self._test_file.source, encoding="latin1")
+ return io.open(self._test_file.source, encoding="utf8")
def _get_expected(self):
with self._open_source_file() as fobj: