summaryrefslogtreecommitdiff
path: root/Lib/codecs.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/codecs.py')
-rw-r--r--Lib/codecs.py49
1 files changed, 29 insertions, 20 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index b150d64d53..c2065dafa2 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -73,9 +73,19 @@ BOM64_BE = BOM_UTF32_BE
### Codec base classes (defining the API)
class CodecInfo(tuple):
+ """Codec details when looking up the codec registry"""
+
+ # Private API to allow Python 3.4 to blacklist the known non-Unicode
+ # codecs in the standard library. A more general mechanism to
+ # reliably distinguish test encodings from other codecs will hopefully
+ # be defined for Python 3.5
+ #
+ # See http://bugs.python.org/issue19619
+ _is_text_encoding = True # Assume codecs are text encodings by default
def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
- incrementalencoder=None, incrementaldecoder=None, name=None):
+ incrementalencoder=None, incrementaldecoder=None, name=None,
+ *, _is_text_encoding=None):
self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
self.name = name
self.encode = encode
@@ -84,6 +94,8 @@ class CodecInfo(tuple):
self.incrementaldecoder = incrementaldecoder
self.streamwriter = streamwriter
self.streamreader = streamreader
+ if _is_text_encoding is not None:
+ self._is_text_encoding = _is_text_encoding
return self
def __repr__(self):
@@ -105,6 +117,7 @@ class Codec:
Python will use the official U+FFFD REPLACEMENT
CHARACTER for the builtin Unicode codecs on
decoding and '?' on encoding.
+ 'surrogateescape' - replace with private codepoints U+DCnn.
'xmlcharrefreplace' - Replace with the appropriate XML
character reference (only for encoding).
'backslashreplace' - Replace with backslashed escape sequences
@@ -461,16 +474,13 @@ class StreamReader(Codec):
# read until we get the required number of characters (if available)
while True:
- # can the request can be satisfied from the character buffer?
- if chars < 0:
- if size < 0:
- if self.charbuffer:
- break
- elif len(self.charbuffer) >= size:
- break
- else:
+ # can the request be satisfied from the character buffer?
+ if chars >= 0:
if len(self.charbuffer) >= chars:
break
+ elif size >= 0:
+ if len(self.charbuffer) >= size:
+ break
# we need more data
if size < 0:
newdata = self.stream.read()
@@ -478,13 +488,15 @@ class StreamReader(Codec):
newdata = self.stream.read(size)
# decode bytes (those remaining from the last call included)
data = self.bytebuffer + newdata
+ if not data:
+ break
try:
newchars, decodedbytes = self.decode(data, self.errors)
except UnicodeDecodeError as exc:
if firstline:
newchars, decodedbytes = \
self.decode(data[:exc.start], self.errors)
- lines = newchars.splitlines(True)
+ lines = newchars.splitlines(keepends=True)
if len(lines)<=1:
raise
else:
@@ -526,7 +538,7 @@ class StreamReader(Codec):
self.charbuffer = self.linebuffer[0]
self.linebuffer = None
if not keepends:
- line = line.splitlines(False)[0]
+ line = line.splitlines(keepends=False)[0]
return line
readsize = size or 72
@@ -543,7 +555,7 @@ class StreamReader(Codec):
data += self.read(size=1, chars=1)
line += data
- lines = line.splitlines(True)
+ lines = line.splitlines(keepends=True)
if lines:
if len(lines) > 1:
# More than one line result; the first line is a full line
@@ -559,10 +571,10 @@ class StreamReader(Codec):
# only one remaining line, put it back into charbuffer
self.charbuffer = lines[0] + self.charbuffer
if not keepends:
- line = line.splitlines(False)[0]
+ line = line.splitlines(keepends=False)[0]
break
line0withend = lines[0]
- line0withoutend = lines[0].splitlines(False)[0]
+ line0withoutend = lines[0].splitlines(keepends=False)[0]
if line0withend != line0withoutend: # We really have a line end
# Put the rest back together and keep it until the next call
self.charbuffer = self._empty_charbuffer.join(lines[1:]) + \
@@ -575,7 +587,7 @@ class StreamReader(Codec):
# we didn't get anything or this was our only try
if not data or size is not None:
if line and not keepends:
- line = line.splitlines(False)[0]
+ line = line.splitlines(keepends=False)[0]
break
if readsize < 8000:
readsize *= 2
@@ -803,7 +815,7 @@ class StreamRecoder:
data = self.reader.read()
data, bytesencoded = self.encode(data, self.errors)
- return data.splitlines(1)
+ return data.splitlines(keepends=True)
def __next__(self):
@@ -1042,10 +1054,7 @@ def make_identity_dict(rng):
mapped to themselves.
"""
- res = {}
- for i in rng:
- res[i]=i
- return res
+ return {i:i for i in rng}
def make_encoding_map(decoding_map):