From bd5a11987fc9d9145a1414db635c803818ddd5c9 Mon Sep 17 00:00:00 2001 From: Claudiu Popa Date: Fri, 16 Oct 2015 21:29:08 +0300 Subject: Decode the docstring before attempting to parse it with the spelling checker Also, the patch changes calls to str() in the list of html reporter's messages only for the objects which aren't already unicode or byte strings. --- pylint/checkers/spelling.py | 6 ++++++ pylint/reporters/html.py | 6 +++++- pylint/reporters/ureports/__init__.py | 5 +---- pylint/reporters/ureports/text_writer.py | 4 ---- pylint/test/test_self.py | 5 +++++ pylint/utils.py | 8 ++++---- 6 files changed, 21 insertions(+), 13 deletions(-) diff --git a/pylint/checkers/spelling.py b/pylint/checkers/spelling.py index a5b7857..725f25a 100644 --- a/pylint/checkers/spelling.py +++ b/pylint/checkers/spelling.py @@ -21,6 +21,8 @@ import tokenize import string import re +import six + if sys.version_info[0] >= 3: maketrans = str.maketrans else: @@ -244,6 +246,10 @@ class SpellingChecker(BaseTokenChecker): return start_line = node.lineno + 1 + if six.PY2: + encoding = node.root().file_encoding + docstring = docstring.decode(encoding or sys.getdefaultencoding(), + 'replace') # Go through lines of docstring for idx, line in enumerate(docstring.splitlines()): diff --git a/pylint/reporters/html.py b/pylint/reporters/html.py index b2214b1..b06ee16 100644 --- a/pylint/reporters/html.py +++ b/pylint/reporters/html.py @@ -17,6 +17,8 @@ import itertools import string import sys +import six + from pylint.interfaces import IReporter from pylint.reporters import BaseReporter from pylint.reporters.ureports.html_writer import HTMLWriter @@ -67,7 +69,9 @@ class HTMLReporter(BaseReporter): self._parse_template() # We want to add the lines given by the template - self.msgs += [str(getattr(msg, field)) for field in self.msgargs] + values = [getattr(msg, field) for field in self.msgargs] + self.msgs += [value if isinstance(value, six.text_type) else str(value) + for value in values] def set_output(self, output=None): """set output stream diff --git a/pylint/reporters/ureports/__init__.py b/pylint/reporters/ureports/__init__.py index 8a0a036..02322db 100644 --- a/pylint/reporters/ureports/__init__.py +++ b/pylint/reporters/ureports/__init__.py @@ -59,10 +59,7 @@ class BaseWriter(object): def write(self, string): """write a string in the output buffer""" - try: - self.out.write(string) - except UnicodeEncodeError: - self.out.write(string.encode(self.encoding)) + self.out.write(string) def begin_format(self): """begin to format a layout""" diff --git a/pylint/reporters/ureports/text_writer.py b/pylint/reporters/ureports/text_writer.py index acf7b3b..6109b95 100644 --- a/pylint/reporters/ureports/text_writer.py +++ b/pylint/reporters/ureports/text_writer.py @@ -19,10 +19,6 @@ from __future__ import print_function -import os - -from six.moves import range - from pylint.reporters.ureports import BaseWriter diff --git a/pylint/test/test_self.py b/pylint/test/test_self.py index ba4cdab..b430b12 100644 --- a/pylint/test/test_self.py +++ b/pylint/test/test_self.py @@ -259,6 +259,11 @@ class RunTC(unittest.TestCase): self._test_output([module, "--disable=all", "--enable=all", "-rn"], expected_output=expected) + def test_html_crash_report(self): + out = six.StringIO() + module = join(HERE, 'regrtest_data', 'html_crash_420.py') + self._runtest([module], code=16, reporter=HTMLReporter(out)) + if __name__ == '__main__': unittest.main() diff --git a/pylint/utils.py b/pylint/utils.py index f303411..e34e8e8 100644 --- a/pylint/utils.py +++ b/pylint/utils.py @@ -142,9 +142,8 @@ def category_id(cid): return MSG_TYPES_LONG.get(cid) -def _decoding_readline(stream, module): - return lambda: stream.readline().decode(module.file_encoding, - 'replace') +def _decoding_readline(stream, encoding): + return lambda: stream.readline().decode(encoding, 'replace') def tokenize_module(module): @@ -152,7 +151,8 @@ def tokenize_module(module): readline = stream.readline if sys.version_info < (3, 0): if module.file_encoding is not None: - readline = _decoding_readline(stream, module) + readline = _decoding_readline(stream, module.file_encoding) + return list(tokenize.generate_tokens(readline)) return list(tokenize.tokenize(readline)) -- cgit v1.2.1