Decode the docstring before attempting to parse it with the spelling checker

Also, the patch changes calls to str() in the list of html reporter's messages only for the objects which aren't already unicode or byte strings.
author: Claudiu Popa <pcmanticore@gmail.com> 2015-10-16 21:29:08 +0300
committer: Claudiu Popa <pcmanticore@gmail.com> 2015-10-16 21:29:08 +0300
commit: bd5a11987fc9d9145a1414db635c803818ddd5c9 (patch)
tree: 76f5ba8f01eb5038b9cf325b16ab192dc53eea73
parent: 258edc970bfe327ff5c37353a6b431d3f250fc35 (diff)
download: pylint-bd5a11987fc9d9145a1414db635c803818ddd5c9.tar.gz
6 files changed, 21 insertions, 13 deletions
diff --git a/pylint/checkers/spelling.py b/pylint/checkers/spelling.py
index a5b7857..725f25a 100644
--- a/pylint/checkers/spelling.py
+++ b/pylint/checkers/spelling.py
@@ -21,6 +21,8 @@ import tokenize
 import string
 import re
 
+import six
+
 if sys.version_info[0] >= 3:
     maketrans = str.maketrans
 else:
@@ -244,6 +246,10 @@ class SpellingChecker(BaseTokenChecker):
             return
 
         start_line = node.lineno + 1
+        if six.PY2:
+            encoding = node.root().file_encoding
+            docstring = docstring.decode(encoding or sys.getdefaultencoding(),
+                                         'replace')
 
         # Go through lines of docstring
         for idx, line in enumerate(docstring.splitlines()):
diff --git a/pylint/reporters/html.py b/pylint/reporters/html.py
index b2214b1..b06ee16 100644
--- a/pylint/reporters/html.py
+++ b/pylint/reporters/html.py
@@ -17,6 +17,8 @@ import itertools
 import string
 import sys
 
+import six
+
 from pylint.interfaces import IReporter
 from pylint.reporters import BaseReporter
 from pylint.reporters.ureports.html_writer import HTMLWriter
@@ -67,7 +69,9 @@ class HTMLReporter(BaseReporter):
             self._parse_template()
 
         # We want to add the lines given by the template
-        self.msgs += [str(getattr(msg, field)) for field in self.msgargs]
+        values = [getattr(msg, field) for field in self.msgargs]
+        self.msgs += [value if isinstance(value, six.text_type) else str(value)
+                      for value in values]
 
     def set_output(self, output=None):
         """set output stream
diff --git a/pylint/reporters/ureports/__init__.py b/pylint/reporters/ureports/__init__.py
index 8a0a036..02322db 100644
--- a/pylint/reporters/ureports/__init__.py
+++ b/pylint/reporters/ureports/__init__.py
@@ -59,10 +59,7 @@ class BaseWriter(object):
 
     def write(self, string):
         """write a string in the output buffer"""
-        try:
-            self.out.write(string)
-        except UnicodeEncodeError:
-            self.out.write(string.encode(self.encoding))
+        self.out.write(string)
 
     def begin_format(self):
         """begin to format a layout"""
diff --git a/pylint/reporters/ureports/text_writer.py b/pylint/reporters/ureports/text_writer.py
index acf7b3b..6109b95 100644
--- a/pylint/reporters/ureports/text_writer.py
+++ b/pylint/reporters/ureports/text_writer.py
@@ -19,10 +19,6 @@
 
 from __future__ import print_function
 
-import os
-
-from six.moves import range
-
 from pylint.reporters.ureports import BaseWriter
 
 
diff --git a/pylint/test/test_self.py b/pylint/test/test_self.py
index ba4cdab..b430b12 100644
--- a/pylint/test/test_self.py
+++ b/pylint/test/test_self.py
@@ -259,6 +259,11 @@ class RunTC(unittest.TestCase):
         self._test_output([module, "--disable=all", "--enable=all", "-rn"],
                           expected_output=expected)
 
+    def test_html_crash_report(self):
+        out = six.StringIO()
+        module = join(HERE, 'regrtest_data', 'html_crash_420.py')
+        self._runtest([module], code=16, reporter=HTMLReporter(out))
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/pylint/utils.py b/pylint/utils.py
index f303411..e34e8e8 100644
--- a/pylint/utils.py
+++ b/pylint/utils.py
@@ -142,9 +142,8 @@ def category_id(cid):
     return MSG_TYPES_LONG.get(cid)
 
 
-def _decoding_readline(stream, module):
-    return lambda: stream.readline().decode(module.file_encoding,
-                                            'replace')
+def _decoding_readline(stream, encoding):
+    return lambda: stream.readline().decode(encoding, 'replace')
 
 
 def tokenize_module(module):
@@ -152,7 +151,8 @@ def tokenize_module(module):
         readline = stream.readline
         if sys.version_info < (3, 0):
             if module.file_encoding is not None:
-                readline = _decoding_readline(stream, module)
+                readline = _decoding_readline(stream, module.file_encoding)
+
             return list(tokenize.generate_tokens(readline))
         return list(tokenize.tokenize(readline))
author	Claudiu Popa <pcmanticore@gmail.com>	2015-10-16 21:29:08 +0300
committer	Claudiu Popa <pcmanticore@gmail.com>	2015-10-16 21:29:08 +0300
commit	bd5a11987fc9d9145a1414db635c803818ddd5c9 (patch)
tree	76f5ba8f01eb5038b9cf325b16ab192dc53eea73
parent	258edc970bfe327ff5c37353a6b431d3f250fc35 (diff)
download	pylint-bd5a11987fc9d9145a1414db635c803818ddd5c9.tar.gz