From 691ce85b4acf55e699c2305e6302adc655d6fc70 Mon Sep 17 00:00:00 2001 From: jquast Date: Mon, 4 Nov 2013 20:32:20 -0800 Subject: encode tparm output as latin1 as explained in the comments above the decode().encode() wrapper, certain terminal kinds, such as 'avatar' or 'kermit' emit 8-bit bytes that are not legal utf-8 tender. this issue is resolved by encoding these values as latin1, which leaves their values unmolested. This is exaplified by a gist: https://gist.github.com/jquast/5649654 If you don't believe me, try kind='kermit' or kind='avatar', along with t.cup(n, n). Unfortunately, the state of the test cases in the master branch (issue #33) does not allow to add a test case in this branch until that one is pulled to master. Hell, I'll go ahead and make a third (and final) pull request for that one, --- blessings/__init__.py | 11 +++++++++-- blessings/tests.py | 4 ++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/blessings/__init__.py b/blessings/__init__.py index b135e01..d968bb5 100644 --- a/blessings/__init__.py +++ b/blessings/__init__.py @@ -367,7 +367,7 @@ class Terminal(object): # We can encode escape sequences as UTF-8 because they never # contain chars > 127, and UTF-8 never changes anything within that # range.. - return code.decode('utf-8') + return code.decode('latin1') return u'' def _resolve_color(self, color): @@ -436,7 +436,14 @@ class ParametrizingString(unicode): # Re-encode the cap, because tparm() takes a bytestring in Python # 3. However, appear to be a plain Unicode string otherwise so # concats work. - parametrized = tparm(self.encode('utf-8'), *args).decode('utf-8') + # We use *latin1* encoding so that bytes emitted by tparam are + # encoded to their native value: some terminal kinds, such as + # 'avatar' or 'kermit', emit 8-bit bytes in range 0x7f to 0xff. + # latin1 leaves these values unmodified in their conversion to + # unicode byte values. The terminal emulator will 'catch' and + # handle these values, even if emitting utf8 encoded text, where + # these bytes would otherwise be illegal utf8 start bytes. + parametrized = tparm(self.encode('latin1'), *args).decode('latin1') return (parametrized if self._normal is None else FormattingString(parametrized, self._normal)) except curses.error: diff --git a/blessings/tests.py b/blessings/tests.py index 7dda746..15141dc 100644 --- a/blessings/tests.py +++ b/blessings/tests.py @@ -28,12 +28,12 @@ TestTerminal = partial(Terminal, kind='xterm-256color') def unicode_cap(cap): """Return the result of ``tigetstr`` except as Unicode.""" - return tigetstr(cap).decode('utf-8') + return tigetstr(cap).decode('latin1') def unicode_parm(cap, *parms): """Return the result of ``tparm(tigetstr())`` except as Unicode.""" - return tparm(tigetstr(cap), *parms).decode('utf-8') + return tparm(tigetstr(cap), *parms).decode('latin1') def test_capability(): -- cgit v1.2.1 From dadefbbc4e862cd54c2fae72b29001bdd676aa73 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Tue, 5 Nov 2013 23:04:19 -0500 Subject: Tweak comments and update version history. --- README.rst | 2 ++ blessings/__init__.py | 11 +++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index cfa56c5..d11929e 100644 --- a/README.rst +++ b/README.rst @@ -439,6 +439,8 @@ Version History * Make ``is_a_tty`` a read-only property, like ``does_styling``. Writing to it never would have done anything constructive. * Add ``fullscreen()`` and ``hidden_cursor()`` to the auto-generated docs. + * Support terminal types, such as kermit and avatar, that use bytes 127-255 + in their escape sequences. (jquast) 1.5.1 * Clean up fabfile, removing the redundant ``test`` command. diff --git a/blessings/__init__.py b/blessings/__init__.py index d968bb5..a5013aa 100644 --- a/blessings/__init__.py +++ b/blessings/__init__.py @@ -364,9 +364,7 @@ class Terminal(object): """ code = tigetstr(self._sugar.get(atom, atom)) if code: - # We can encode escape sequences as UTF-8 because they never - # contain chars > 127, and UTF-8 never changes anything within that - # range.. + # See the comment in ParametrizingString for why this is latin1. return code.decode('latin1') return u'' @@ -436,12 +434,13 @@ class ParametrizingString(unicode): # Re-encode the cap, because tparm() takes a bytestring in Python # 3. However, appear to be a plain Unicode string otherwise so # concats work. - # We use *latin1* encoding so that bytes emitted by tparam are + # + # We use *latin1* encoding so that bytes emitted by tparm are # encoded to their native value: some terminal kinds, such as # 'avatar' or 'kermit', emit 8-bit bytes in range 0x7f to 0xff. # latin1 leaves these values unmodified in their conversion to - # unicode byte values. The terminal emulator will 'catch' and - # handle these values, even if emitting utf8 encoded text, where + # unicode byte values. The terminal emulator will "catch" and + # handle these values, even if emitting utf8-encoded text, where # these bytes would otherwise be illegal utf8 start bytes. parametrized = tparm(self.encode('latin1'), *args).decode('latin1') return (parametrized if self._normal is None else -- cgit v1.2.1