summaryrefslogtreecommitdiff
path: root/pylint/checkers/strings.py
diff options
context:
space:
mode:
Diffstat (limited to 'pylint/checkers/strings.py')
-rw-r--r--pylint/checkers/strings.py16
1 files changed, 14 insertions, 2 deletions
diff --git a/pylint/checkers/strings.py b/pylint/checkers/strings.py
index d1f227133..92dcbbeec 100644
--- a/pylint/checkers/strings.py
+++ b/pylint/checkers/strings.py
@@ -592,12 +592,20 @@ class StringConstantChecker(BaseTokenChecker):
self._unicode_literals = "unicode_literals" in module.future_imports
def process_tokens(self, tokens):
- for i, (tok_type, token, start, _, _) in enumerate(tokens):
- if tok_type == tokenize.STRING:
+ encoding = "ascii"
+ for i, (tok_type, token, start, _, line) in enumerate(tokens):
+ if tok_type == tokenize.ENCODING:
+ # this is always the first token processed
+ encoding = token
+ elif tok_type == tokenize.STRING:
# 'token' is the whole un-parsed token; we can look at the start
# of it to see whether it's a raw or unicode string etc.
self.process_string_token(token, start[0])
next_token = tokens[i + 1] if i + 1 < len(tokens) else None
+ if encoding != "ascii":
+ # We convert `tokenize` character count into a byte count,
+ # to match with astroid `.col_offset`
+ start = (start[0], len(line[: start[1]].encode(encoding)))
self.string_tokens[start] = (str_eval(token), next_token)
@check_messages(*(MSGS.keys()))
@@ -618,6 +626,10 @@ class StringConstantChecker(BaseTokenChecker):
if elt.col_offset < 0:
# This can happen in case of escaped newlines
continue
+ if (elt.lineno, elt.col_offset) not in self.string_tokens:
+ # This may happen with Latin1 encoding
+ # cf. https://github.com/PyCQA/pylint/issues/2610
+ continue
matching_token, next_token = self.string_tokens[
(elt.lineno, elt.col_offset)
]