testtools/compat.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156

# Copyright (c) 2008-2015 testtools developers. See LICENSE for details.

"""Compatibility support for python 2 and 3."""

__metaclass__ = type
__all__ = [
    '_b',
    'advance_iterator',
    'reraise',
    'unicode_output_stream',
    'StringIO',
    'BytesIO',
    ]

import codecs
import io
import locale
import os
import sys
import unicodedata
# Ensure retro-compatibility with older testtools releases
from io import StringIO, BytesIO


def reraise(exc_class, exc_obj, exc_tb, _marker=object()):
    """Re-raise an exception received from sys.exc_info() or similar."""
    raise exc_obj.with_traceback(exc_tb)


def _u(s):
    return s


def _b(s):
    """A byte literal."""
    return s.encode("latin-1")


advance_iterator = next


def _slow_escape(text):
    """Escape unicode ``text`` leaving printable characters unmodified

    The behaviour emulates the Python 3 implementation of repr, see
    unicode_repr in unicodeobject.c and isprintable definition.

    Because this iterates over the input a codepoint at a time, it's slow, and
    does not handle astral characters correctly on Python builds with 16 bit
    rather than 32 bit unicode type.
    """
    output = []
    for c in text:
        o = ord(c)
        if o < 256:
            if o < 32 or 126 < o < 161:
                output.append(c.encode("unicode-escape"))
            elif o == 92:
                # Separate due to bug in unicode-escape codec in Python 2.4
                output.append("\\\\")
            else:
                output.append(c)
        else:
            # To get correct behaviour would need to pair up surrogates here
            if unicodedata.category(c)[0] in "CZ":
                output.append(c.encode("unicode-escape"))
            else:
                output.append(c)
    return "".join(output)


def text_repr(text, multiline=None):
    """Rich repr for ``text`` returning unicode, triple quoted if ``multiline``.
    """
    nl = isinstance(text, bytes) and bytes((0xA,)) or "\n"
    if multiline is None:
        multiline = nl in text
    if not multiline:
        # Use normal repr for single line of unicode
        return repr(text)
    prefix = repr(text[:0])[:-2]
    if multiline:
        # To escape multiline strings, split and process each line in turn,
        # making sure that quotes are not escaped.
        offset = len(prefix) + 1
        lines = []
        for l in text.split(nl):
            r = repr(l)
            q = r[-1]
            lines.append(r[offset:-1].replace("\\" + q, q))
        # Combine the escaped lines and append two of the closing quotes,
        # then iterate over the result to escape triple quotes correctly.
        _semi_done = "\n".join(lines) + "''"
        p = 0
        while True:
            p = _semi_done.find("'''", p)
            if p == -1:
                break
            _semi_done = "\\".join([_semi_done[:p], _semi_done[p:]])
            p += 2
        return "".join([prefix, "'''\\\n", _semi_done, "'"])
    escaped_text = _slow_escape(text)
    # Determine which quote character to use and if one gets prefixed with a
    # backslash following the same logic Python uses for repr() on strings
    quote = "'"
    if "'" in text:
        if '"' in text:
            escaped_text = escaped_text.replace("'", "\\'")
        else:
            quote = '"'
    return "".join([prefix, quote, escaped_text, quote])


def unicode_output_stream(stream):
    """Get wrapper for given stream that writes any unicode without exception

    Characters that can't be coerced to the encoding of the stream, or 'ascii'
    if valid encoding is not found, will be replaced. The original stream may
    be returned in situations where a wrapper is determined unneeded.

    The wrapper only allows unicode to be written, not non-ascii bytestrings,
    which is a good thing to ensure sanity and sanitation.
    """
    if (sys.platform == "cli" or
        isinstance(stream, (io.TextIOWrapper, io.StringIO))):
        # Best to never encode before writing in IronPython, or if it is
        # already a TextIO [which in the io library has no encoding
        # attribute).
        return stream
    try:
        writer = codecs.getwriter(stream.encoding or "")
    except (AttributeError, LookupError):
        return codecs.getwriter("ascii")(stream, "replace")
    if writer.__module__.rsplit(".", 1)[1].startswith("utf"):
        # The current stream has a unicode encoding so no error handler is needed
        return stream
    # Python 3 doesn't seem to make this easy, handle a common case
    try:
        return stream.__class__(stream.buffer, stream.encoding, "replace",
            stream.newlines, stream.line_buffering)
    except AttributeError:
        pass
    return writer(stream, "replace")


def _get_exception_encoding():
    """Return the encoding we expect messages from the OS to be encoded in"""
    if os.name == "nt":
        # GZ 2010-05-24: Really want the codepage number instead, the error
        #                handling of standard codecs is more deterministic
        return "mbcs"
    # GZ 2010-05-23: We need this call to be after initialisation, but there's
    #                no benefit in asking more than once as it's a global
    #                setting that can change after the message is formatted.
    return locale.getlocale(locale.LC_MESSAGES)[1] or "ascii"