1 files changed, 29 insertions, 20 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index b150d64d53..c2065dafa2 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -73,9 +73,19 @@ BOM64_BE = BOM_UTF32_BE
 ### Codec base classes (defining the API)
 
 class CodecInfo(tuple):
+    """Codec details when looking up the codec registry"""
+
+    # Private API to allow Python 3.4 to blacklist the known non-Unicode
+    # codecs in the standard library. A more general mechanism to
+    # reliably distinguish test encodings from other codecs will hopefully
+    # be defined for Python 3.5
+    #
+    # See http://bugs.python.org/issue19619
+    _is_text_encoding = True # Assume codecs are text encodings by default
 
     def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
-        incrementalencoder=None, incrementaldecoder=None, name=None):
+        incrementalencoder=None, incrementaldecoder=None, name=None,
+        *, _is_text_encoding=None):
         self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
         self.name = name
         self.encode = encode
@@ -84,6 +94,8 @@ class CodecInfo(tuple):
         self.incrementaldecoder = incrementaldecoder
         self.streamwriter = streamwriter
         self.streamreader = streamreader
+        if _is_text_encoding is not None:
+            self._is_text_encoding = _is_text_encoding
         return self
 
     def __repr__(self):
@@ -105,6 +117,7 @@ class Codec:
                     Python will use the official U+FFFD REPLACEMENT
                     CHARACTER for the builtin Unicode codecs on
                     decoding and '?' on encoding.
+         'surrogateescape' - replace with private codepoints U+DCnn.
          'xmlcharrefreplace' - Replace with the appropriate XML
                                character reference (only for encoding).
          'backslashreplace'  - Replace with backslashed escape sequences
@@ -461,16 +474,13 @@ class StreamReader(Codec):
 
         # read until we get the required number of characters (if available)
         while True:
-            # can the request can be satisfied from the character buffer?
-            if chars < 0:
-                if size < 0:
-                    if self.charbuffer:
-                        break
-                elif len(self.charbuffer) >= size:
-                    break
-            else:
+            # can the request be satisfied from the character buffer?
+            if chars >= 0:
                 if len(self.charbuffer) >= chars:
                     break
+            elif size >= 0:
+                if len(self.charbuffer) >= size:
+                    break
             # we need more data
             if size < 0:
                 newdata = self.stream.read()
@@ -478,13 +488,15 @@ class StreamReader(Codec):
                 newdata = self.stream.read(size)
             # decode bytes (those remaining from the last call included)
             data = self.bytebuffer + newdata
+            if not data:
+                break
             try:
                 newchars, decodedbytes = self.decode(data, self.errors)
             except UnicodeDecodeError as exc:
                 if firstline:
                     newchars, decodedbytes = \
                         self.decode(data[:exc.start], self.errors)
-                    lines = newchars.splitlines(True)
+                    lines = newchars.splitlines(keepends=True)
                     if len(lines)<=1:
                         raise
                 else:
@@ -526,7 +538,7 @@ class StreamReader(Codec):
                 self.charbuffer = self.linebuffer[0]
                 self.linebuffer = None
             if not keepends:
-                line = line.splitlines(False)[0]
+                line = line.splitlines(keepends=False)[0]
             return line
 
         readsize = size or 72
@@ -543,7 +555,7 @@ class StreamReader(Codec):
                     data += self.read(size=1, chars=1)
 
             line += data
-            lines = line.splitlines(True)
+            lines = line.splitlines(keepends=True)
             if lines:
                 if len(lines) > 1:
                     # More than one line result; the first line is a full line
@@ -559,10 +571,10 @@ class StreamReader(Codec):
                         # only one remaining line, put it back into charbuffer
                         self.charbuffer = lines[0] + self.charbuffer
                     if not keepends:
-                        line = line.splitlines(False)[0]
+                        line = line.splitlines(keepends=False)[0]
                     break
                 line0withend = lines[0]
-                line0withoutend = lines[0].splitlines(False)[0]
+                line0withoutend = lines[0].splitlines(keepends=False)[0]
                 if line0withend != line0withoutend: # We really have a line end
                     # Put the rest back together and keep it until the next call
                     self.charbuffer = self._empty_charbuffer.join(lines[1:]) + \
@@ -575,7 +587,7 @@ class StreamReader(Codec):
             # we didn't get anything or this was our only try
             if not data or size is not None:
                 if line and not keepends:
-                    line = line.splitlines(False)[0]
+                    line = line.splitlines(keepends=False)[0]
                 break
             if readsize < 8000:
                 readsize *= 2
@@ -803,7 +815,7 @@ class StreamRecoder:
 
         data = self.reader.read()
         data, bytesencoded = self.encode(data, self.errors)
-        return data.splitlines(1)
+        return data.splitlines(keepends=True)
 
     def __next__(self):
 
@@ -1042,10 +1054,7 @@ def make_identity_dict(rng):
         mapped to themselves.
 
     """
-    res = {}
-    for i in rng:
-        res[i]=i
-    return res
+    return {i:i for i in rng}
 
 def make_encoding_map(decoding_map):