From bd5a11987fc9d9145a1414db635c803818ddd5c9 Mon Sep 17 00:00:00 2001
From: Claudiu Popa <pcmanticore@gmail.com>
Date: Fri, 16 Oct 2015 21:29:08 +0300
Subject: Decode the docstring before attempting to parse it with the spelling
 checker

Also, the patch changes calls to str() in the list of html reporter's messages
only for the objects which aren't already unicode or byte strings.
---
 pylint/checkers/spelling.py              | 6 ++++++
 pylint/reporters/html.py                 | 6 +++++-
 pylint/reporters/ureports/__init__.py    | 5 +----
 pylint/reporters/ureports/text_writer.py | 4 ----
 pylint/test/test_self.py                 | 5 +++++
 pylint/utils.py                          | 8 ++++----
 6 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/pylint/checkers/spelling.py b/pylint/checkers/spelling.py
index a5b7857..725f25a 100644
--- a/pylint/checkers/spelling.py
+++ b/pylint/checkers/spelling.py
@@ -21,6 +21,8 @@ import tokenize
 import string
 import re
 
+import six
+
 if sys.version_info[0] >= 3:
     maketrans = str.maketrans
 else:
@@ -244,6 +246,10 @@ class SpellingChecker(BaseTokenChecker):
             return
 
         start_line = node.lineno + 1
+        if six.PY2:
+            encoding = node.root().file_encoding
+            docstring = docstring.decode(encoding or sys.getdefaultencoding(),
+                                         'replace')
 
         # Go through lines of docstring
         for idx, line in enumerate(docstring.splitlines()):
diff --git a/pylint/reporters/html.py b/pylint/reporters/html.py
index b2214b1..b06ee16 100644
--- a/pylint/reporters/html.py
+++ b/pylint/reporters/html.py
@@ -17,6 +17,8 @@ import itertools
 import string
 import sys
 
+import six
+
 from pylint.interfaces import IReporter
 from pylint.reporters import BaseReporter
 from pylint.reporters.ureports.html_writer import HTMLWriter
@@ -67,7 +69,9 @@ class HTMLReporter(BaseReporter):
             self._parse_template()
 
         # We want to add the lines given by the template
-        self.msgs += [str(getattr(msg, field)) for field in self.msgargs]
+        values = [getattr(msg, field) for field in self.msgargs]
+        self.msgs += [value if isinstance(value, six.text_type) else str(value)
+                      for value in values]
 
     def set_output(self, output=None):
         """set output stream
diff --git a/pylint/reporters/ureports/__init__.py b/pylint/reporters/ureports/__init__.py
index 8a0a036..02322db 100644
--- a/pylint/reporters/ureports/__init__.py
+++ b/pylint/reporters/ureports/__init__.py
@@ -59,10 +59,7 @@ class BaseWriter(object):
 
     def write(self, string):
         """write a string in the output buffer"""
-        try:
-            self.out.write(string)
-        except UnicodeEncodeError:
-            self.out.write(string.encode(self.encoding))
+        self.out.write(string)
 
     def begin_format(self):
         """begin to format a layout"""
diff --git a/pylint/reporters/ureports/text_writer.py b/pylint/reporters/ureports/text_writer.py
index acf7b3b..6109b95 100644
--- a/pylint/reporters/ureports/text_writer.py
+++ b/pylint/reporters/ureports/text_writer.py
@@ -19,10 +19,6 @@
 
 from __future__ import print_function
 
-import os
-
-from six.moves import range
-
 from pylint.reporters.ureports import BaseWriter
 
 
diff --git a/pylint/test/test_self.py b/pylint/test/test_self.py
index ba4cdab..b430b12 100644
--- a/pylint/test/test_self.py
+++ b/pylint/test/test_self.py
@@ -259,6 +259,11 @@ class RunTC(unittest.TestCase):
         self._test_output([module, "--disable=all", "--enable=all", "-rn"],
                           expected_output=expected)
 
+    def test_html_crash_report(self):
+        out = six.StringIO()
+        module = join(HERE, 'regrtest_data', 'html_crash_420.py')
+        self._runtest([module], code=16, reporter=HTMLReporter(out))
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/pylint/utils.py b/pylint/utils.py
index f303411..e34e8e8 100644
--- a/pylint/utils.py
+++ b/pylint/utils.py
@@ -142,9 +142,8 @@ def category_id(cid):
     return MSG_TYPES_LONG.get(cid)
 
 
-def _decoding_readline(stream, module):
-    return lambda: stream.readline().decode(module.file_encoding,
-                                            'replace')
+def _decoding_readline(stream, encoding):
+    return lambda: stream.readline().decode(encoding, 'replace')
 
 
 def tokenize_module(module):
@@ -152,7 +151,8 @@ def tokenize_module(module):
         readline = stream.readline
         if sys.version_info < (3, 0):
             if module.file_encoding is not None:
-                readline = _decoding_readline(stream, module)
+                readline = _decoding_readline(stream, module.file_encoding)
+
             return list(tokenize.generate_tokens(readline))
         return list(tokenize.tokenize(readline))
 
-- 
cgit v1.2.1