Default lexer encoding is now "guess", i.e. UTF-8 / Locale / Latin1 is

tried in that order.
author: Georg Brandl <georg@python.org> 2014-11-06 13:18:19 +0100
committer: Georg Brandl <georg@python.org> 2014-11-06 13:18:19 +0100
commit: 69e83eb0856666d2594c96b1e8fae42dbeb92318 (patch)
tree: 818573e86d101542b1f7536c313443d5baf77dd5
parent: 9053d1b7a1c3ac2c90944fe9d9564e0351dac74f (diff)
download: pygments-69e83eb0856666d2594c96b1e8fae42dbeb92318.tar.gz
4 files changed, 10 insertions, 8 deletions
diff --git a/CHANGES b/CHANGES
index 95f975ac..e8c12970 100644
--- a/CHANGES
+++ b/CHANGES
@@ -74,6 +74,9 @@ Version 2.0rc1
   * Todo.txt todo lists
   * Twig (PR#404)
 
+- Default lexer encoding is now "guess", i.e. UTF-8 / Locale / Latin1 is
+  tried in that order.
+
 - Added a helper to "optimize" regular expressions that match one of many
   literal words; this can save 20% and more lexing time with lexers that
   highlight many keywords or builtins.
diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py
index 4b22215f..7a4eeca8 100644
--- a/pygments/formatters/latex.py
+++ b/pygments/formatters/latex.py
@@ -417,7 +417,7 @@ class LatexFormatter(Formatter):
                 dict(docclass  = self.docclass,
                      preamble  = self.preamble,
                      title     = self.title,
-                     encoding  = self.encoding or 'latin1',
+                     encoding  = self.encoding or 'utf8',
                      styledefs = self.get_style_defs(),
                      code      = outfile.getvalue()))
 
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 687d19e3..e6c60015 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -71,10 +71,9 @@ class Lexer(object):
     ``encoding``
         If given, must be an encoding name. This encoding will be used to
         convert the input string to Unicode, if it is not already a Unicode
-        string (default: ``'latin1'``).
-        Can also be ``'guess'`` to use a simple UTF-8 / Locale / Latin1
-        detection, or ``'chardet'`` to use the chardet library, if it is
-        installed.
+        string (default: ``'guess'``, which uses a simple UTF-8 / Locale /
+        Latin1 detection.  Can also be ``'chardet'`` to use the chardet
+        library, if it is installed.
     ``inencoding``
         Overrides the ``encoding`` if given.
     """
@@ -103,7 +102,7 @@ class Lexer(object):
         self.stripall = get_bool_opt(options, 'stripall', False)
         self.ensurenl = get_bool_opt(options, 'ensurenl', True)
         self.tabsize = get_int_opt(options, 'tabsize', 0)
-        self.encoding = options.get('encoding', 'latin1')
+        self.encoding = options.get('encoding', 'guess')
         self.encoding = options.get('inencoding') or self.encoding
         self.filters = []
         for filter_ in get_list_opt(options, 'filters', ()):
diff --git a/pygments/lexers/__init__.py b/pygments/lexers/__init__.py
index 578a9101..039090c5 100644
--- a/pygments/lexers/__init__.py
+++ b/pygments/lexers/__init__.py
@@ -18,7 +18,7 @@ from os.path import basename
 from pygments.lexers._mapping import LEXERS
 from pygments.modeline import get_filetype_from_buffer
 from pygments.plugin import find_plugin_lexers
-from pygments.util import ClassNotFound, itervalues
+from pygments.util import ClassNotFound, itervalues, guess_decode
 
 
 __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class',
@@ -116,7 +116,7 @@ def find_lexer_class_for_filename(_fn, code=None):
 
     if sys.version_info > (3,) and isinstance(code, bytes):
         # decode it, since all analyse_text functions expect unicode
-        code = code.decode('latin1')
+        code = guess_decode(code)
 
     def get_rating(info):
         cls, filename = info
author	Georg Brandl <georg@python.org>	2014-11-06 13:18:19 +0100
committer	Georg Brandl <georg@python.org>	2014-11-06 13:18:19 +0100
commit	69e83eb0856666d2594c96b1e8fae42dbeb92318 (patch)
tree	818573e86d101542b1f7536c313443d5baf77dd5
parent	9053d1b7a1c3ac2c90944fe9d9564e0351dac74f (diff)
download	pygments-69e83eb0856666d2594c96b1e8fae42dbeb92318.tar.gz