summaryrefslogtreecommitdiff
path: root/pygments/util.py
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2014-10-08 01:20:11 +0200
committerGeorg Brandl <georg@python.org>2014-10-08 01:20:11 +0200
commit484583e428efde3dbea4980ffeafc53d4fe37935 (patch)
tree4102a9b4462a6069eb55bff5009a52e8e35f2314 /pygments/util.py
parentc0ffb8a5babc8e6d1c58b92810f1cc11ae96ff85 (diff)
downloadpygments-484583e428efde3dbea4980ffeafc53d4fe37935.tar.gz
Overhaul encoding handling in cmdline even more.
Now the encoding guessed for the input file will be used for an output file. We now always read and write to the terminal .buffer on Python 3, which allows us to override the terminal encoding and use our guessing algorithm.
Diffstat (limited to 'pygments/util.py')
-rw-r--r--pygments/util.py35
1 files changed, 30 insertions, 5 deletions
diff --git a/pygments/util.py b/pygments/util.py
index abf1cab8..8376a67f 100644
--- a/pygments/util.py
+++ b/pygments/util.py
@@ -282,16 +282,41 @@ def guess_decode(text):
"""
try:
text = text.decode('utf-8')
+ return text, 'utf-8'
except UnicodeDecodeError:
try:
import locale
- text = text.decode(locale.getpreferredencoding())
+ prefencoding = locale.getpreferredencoding()
+ text = text.decode()
+ return text, prefencoding
except (UnicodeDecodeError, LookupError):
text = text.decode('latin1')
- else:
- if text.startswith(u'\ufeff'):
- text = text[len(u'\ufeff'):]
- return text
+ return text, 'latin1'
+
+
+def guess_decode_from_terminal(text, term):
+ """Decode *text* coming from terminal *term*.
+
+ First try the terminal encoding, if given.
+ Then try UTF-8. Then try the preferred locale encoding.
+ Fall back to latin-1, which always works.
+ """
+ if getattr(term, 'encoding', None):
+ try:
+ text = text.decode(term.encoding)
+ except UnicodeDecodeError:
+ pass
+ else:
+ return text, term.encoding
+ return guess_decode(text)
+
+
+def terminal_encoding(term):
+ """Return our best guess of encoding for the given *term*."""
+ if getattr(term, 'encoding', None):
+ return term.encoding
+ import locale
+ return locale.getpreferredencoding()
# Python 2/3 compatibility