diff options
Diffstat (limited to 'Lib/codecs.py')
-rw-r--r-- | Lib/codecs.py | 49 |
1 files changed, 29 insertions, 20 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py index b150d64d53..c2065dafa2 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -73,9 +73,19 @@ BOM64_BE = BOM_UTF32_BE ### Codec base classes (defining the API) class CodecInfo(tuple): + """Codec details when looking up the codec registry""" + + # Private API to allow Python 3.4 to blacklist the known non-Unicode + # codecs in the standard library. A more general mechanism to + # reliably distinguish test encodings from other codecs will hopefully + # be defined for Python 3.5 + # + # See http://bugs.python.org/issue19619 + _is_text_encoding = True # Assume codecs are text encodings by default def __new__(cls, encode, decode, streamreader=None, streamwriter=None, - incrementalencoder=None, incrementaldecoder=None, name=None): + incrementalencoder=None, incrementaldecoder=None, name=None, + *, _is_text_encoding=None): self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter)) self.name = name self.encode = encode @@ -84,6 +94,8 @@ class CodecInfo(tuple): self.incrementaldecoder = incrementaldecoder self.streamwriter = streamwriter self.streamreader = streamreader + if _is_text_encoding is not None: + self._is_text_encoding = _is_text_encoding return self def __repr__(self): @@ -105,6 +117,7 @@ class Codec: Python will use the official U+FFFD REPLACEMENT CHARACTER for the builtin Unicode codecs on decoding and '?' on encoding. + 'surrogateescape' - replace with private codepoints U+DCnn. 'xmlcharrefreplace' - Replace with the appropriate XML character reference (only for encoding). 'backslashreplace' - Replace with backslashed escape sequences @@ -461,16 +474,13 @@ class StreamReader(Codec): # read until we get the required number of characters (if available) while True: - # can the request can be satisfied from the character buffer? - if chars < 0: - if size < 0: - if self.charbuffer: - break - elif len(self.charbuffer) >= size: - break - else: + # can the request be satisfied from the character buffer? + if chars >= 0: if len(self.charbuffer) >= chars: break + elif size >= 0: + if len(self.charbuffer) >= size: + break # we need more data if size < 0: newdata = self.stream.read() @@ -478,13 +488,15 @@ class StreamReader(Codec): newdata = self.stream.read(size) # decode bytes (those remaining from the last call included) data = self.bytebuffer + newdata + if not data: + break try: newchars, decodedbytes = self.decode(data, self.errors) except UnicodeDecodeError as exc: if firstline: newchars, decodedbytes = \ self.decode(data[:exc.start], self.errors) - lines = newchars.splitlines(True) + lines = newchars.splitlines(keepends=True) if len(lines)<=1: raise else: @@ -526,7 +538,7 @@ class StreamReader(Codec): self.charbuffer = self.linebuffer[0] self.linebuffer = None if not keepends: - line = line.splitlines(False)[0] + line = line.splitlines(keepends=False)[0] return line readsize = size or 72 @@ -543,7 +555,7 @@ class StreamReader(Codec): data += self.read(size=1, chars=1) line += data - lines = line.splitlines(True) + lines = line.splitlines(keepends=True) if lines: if len(lines) > 1: # More than one line result; the first line is a full line @@ -559,10 +571,10 @@ class StreamReader(Codec): # only one remaining line, put it back into charbuffer self.charbuffer = lines[0] + self.charbuffer if not keepends: - line = line.splitlines(False)[0] + line = line.splitlines(keepends=False)[0] break line0withend = lines[0] - line0withoutend = lines[0].splitlines(False)[0] + line0withoutend = lines[0].splitlines(keepends=False)[0] if line0withend != line0withoutend: # We really have a line end # Put the rest back together and keep it until the next call self.charbuffer = self._empty_charbuffer.join(lines[1:]) + \ @@ -575,7 +587,7 @@ class StreamReader(Codec): # we didn't get anything or this was our only try if not data or size is not None: if line and not keepends: - line = line.splitlines(False)[0] + line = line.splitlines(keepends=False)[0] break if readsize < 8000: readsize *= 2 @@ -803,7 +815,7 @@ class StreamRecoder: data = self.reader.read() data, bytesencoded = self.encode(data, self.errors) - return data.splitlines(1) + return data.splitlines(keepends=True) def __next__(self): @@ -1042,10 +1054,7 @@ def make_identity_dict(rng): mapped to themselves. """ - res = {} - for i in rng: - res[i]=i - return res + return {i:i for i in rng} def make_encoding_map(decoding_map): |