diff options
author | Georg Brandl <georg@python.org> | 2014-10-08 01:20:11 +0200 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2014-10-08 01:20:11 +0200 |
commit | 484583e428efde3dbea4980ffeafc53d4fe37935 (patch) | |
tree | 4102a9b4462a6069eb55bff5009a52e8e35f2314 /pygments/util.py | |
parent | c0ffb8a5babc8e6d1c58b92810f1cc11ae96ff85 (diff) | |
download | pygments-484583e428efde3dbea4980ffeafc53d4fe37935.tar.gz |
Overhaul encoding handling in cmdline even more.
Now the encoding guessed for the input file will be used for an output file.
We now always read and write to the terminal .buffer on Python 3, which allows
us to override the terminal encoding and use our guessing algorithm.
Diffstat (limited to 'pygments/util.py')
-rw-r--r-- | pygments/util.py | 35 |
1 files changed, 30 insertions, 5 deletions
diff --git a/pygments/util.py b/pygments/util.py index abf1cab8..8376a67f 100644 --- a/pygments/util.py +++ b/pygments/util.py @@ -282,16 +282,41 @@ def guess_decode(text): """ try: text = text.decode('utf-8') + return text, 'utf-8' except UnicodeDecodeError: try: import locale - text = text.decode(locale.getpreferredencoding()) + prefencoding = locale.getpreferredencoding() + text = text.decode() + return text, prefencoding except (UnicodeDecodeError, LookupError): text = text.decode('latin1') - else: - if text.startswith(u'\ufeff'): - text = text[len(u'\ufeff'):] - return text + return text, 'latin1' + + +def guess_decode_from_terminal(text, term): + """Decode *text* coming from terminal *term*. + + First try the terminal encoding, if given. + Then try UTF-8. Then try the preferred locale encoding. + Fall back to latin-1, which always works. + """ + if getattr(term, 'encoding', None): + try: + text = text.decode(term.encoding) + except UnicodeDecodeError: + pass + else: + return text, term.encoding + return guess_decode(text) + + +def terminal_encoding(term): + """Return our best guess of encoding for the given *term*.""" + if getattr(term, 'encoding', None): + return term.encoding + import locale + return locale.getpreferredencoding() # Python 2/3 compatibility |