Merged revisions 59041-59055 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk ........ r59044 | neal.norwitz | 2007-11-18 17:46:20 -0800 (Sun, 18 Nov 2007) | 1 line Use a slightly more recent version than 1.5.2b2. ........ r59047 | walter.doerwald | 2007-11-19 04:14:05 -0800 (Mon, 19 Nov 2007) | 2 lines Fix typo in comment. ........ r59049 | walter.doerwald | 2007-11-19 04:41:10 -0800 (Mon, 19 Nov 2007) | 4 lines Fix for #1444: utf_8_sig.StreamReader was (indirectly through decode()) calling codecs.utf_8_decode() with final==True, which falled with incomplete byte sequences. Fix and test by James G. Sack. ........ r59051 | nick.coghlan | 2007-11-19 05:56:27 -0800 (Mon, 19 Nov 2007) | 1 line Enable some test_cmd_line_script debugging output to investigate failure on Mac OSX buildbot ........ r59053 | facundo.batista | 2007-11-19 08:30:24 -0800 (Mon, 19 Nov 2007) | 3 lines Fixed detail in add_type() explanation (issue 1463). ........ r59054 | guido.van.rossum | 2007-11-19 09:35:24 -0800 (Mon, 19 Nov 2007) | 2 lines Make this work stand-alone, too. ........ r59055 | guido.van.rossum | 2007-11-19 09:50:22 -0800 (Mon, 19 Nov 2007) | 3 lines Fix the OSX failures in this test -- they were due to /tmp being a symlink to /private/tmp. Adding a call to os.path.realpath() to temp_dir() fixed it. ........
author: Guido van Rossum <guido@python.org> 2007-11-19 18:03:44 +0000
committer: Guido van Rossum <guido@python.org> 2007-11-19 18:03:44 +0000
commit: 5d0a4df5b8c382eabff29eb748c8641dbec91850 (patch)
tree: 7874ff97b5c846b25a945247145c7e2e8b673989
parent: 75cce6ddba52166b154f1dca959f10436d999dbf (diff)
download: cpython-5d0a4df5b8c382eabff29eb748c8641dbec91850.tar.gz
4 files changed, 66 insertions, 16 deletions
diff --git a/Doc/library/mimetypes.rst b/Doc/library/mimetypes.rst
index 50140c6dae..f59cd829a9 100644
--- a/Doc/library/mimetypes.rst
+++ b/Doc/library/mimetypes.rst
@@ -96,8 +96,8 @@ behavior of the module.
    extension is already known, the new type will replace the old one. When the type
    is already known the extension will be added to the list of known extensions.
 
-   When *strict* is the mapping will added to the official MIME types, otherwise to
-   the non-standard ones.
+   When *strict* is True (the default), the mapping will added to the official MIME
+   types, otherwise to the non-standard ones.
 
 
 .. data:: inited
diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py
index 07cd5eeb1b..1bb479203f 100644
--- a/Lib/encodings/utf_8_sig.py
+++ b/Lib/encodings/utf_8_sig.py
@@ -103,12 +103,18 @@ class StreamReader(codecs.StreamReader):
             pass
 
     def decode(self, input, errors='strict'):
-        if len(input) < 3 and codecs.BOM_UTF8.startswith(input):
-            # not enough data to decide if this is a BOM
-            # => try again on the next call
-            return ("", 0)
+        if len(input) < 3:
+            if codecs.BOM_UTF8.startswith(input):
+                # not enough data to decide if this is a BOM
+                # => try again on the next call
+                return ("", 0)
+        elif input[:3] == codecs.BOM_UTF8:
+            self.decode = codecs.utf_8_decode
+            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
+            return (output, consumed+3)
+        # (else) no BOM present
         self.decode = codecs.utf_8_decode
-        return decode(input, errors)
+        return codecs.utf_8_decode(input, errors)
 
 ### encodings module API
 
diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py
index 3c6e4a0af5..dcb768f3c0 100644
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@@ -29,6 +29,7 @@ def _run_python(*args):
 @contextlib.contextmanager
 def temp_dir():
     dirname = tempfile.mkdtemp()
+    dirname = os.path.realpath(dirname)
     try:
         yield dirname
     finally:
@@ -82,7 +83,7 @@ def _make_test_zip(zip_dir, zip_basename, script_name):
     zip_file.close()
     # if verbose:
     #    zip_file = zipfile.ZipFile(zip_name, 'r')
-    #    print "Contents of %r:" % zip_name
+    #    print("Contents of %r:" % zip_name)
     #    zip_file.printdir()
     #    zip_file.close()
     return zip_name
@@ -90,9 +91,9 @@ def _make_test_zip(zip_dir, zip_basename, script_name):
 class CmdLineTest(unittest.TestCase):
     def _check_script(self, script_name, expected_file, expected_argv0):
         exit_code, data = _run_python(script_name)
-        # if verbose:
-        #    print "Output from test script %r:" % script_name
-        #    print data
+        if verbose:
+            print("Output from test script %r:" % script_name)
+            print(data)
         self.assertEqual(exit_code, 0, data)
         printed_file = '__file__==%r' % expected_file
         printed_argv0 = 'sys.argv[0]==%r' % expected_argv0
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 5833c6db18..413a5aa8d2 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -59,7 +59,7 @@ class MixInCheckStateHandling:
 class ReadTest(unittest.TestCase, MixInCheckStateHandling):
     def check_partial(self, input, partialresults):
         # get a StreamReader for the encoding and feed the bytestring version
-        # of input to the reader byte by byte. Read every available from
+        # of input to the reader byte by byte. Read everything available from
         # the StreamReader and check that the results equal the appropriate
         # entries from partialresults.
         q = Queue(b"")
@@ -618,10 +618,53 @@ class UTF8SigTest(ReadTest):
         s = "spam"
         self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
 
-    def test_decoder_state(self):
-        u = "\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff"
-        self.check_state_handling_decode(self.encoding,
-                                         u, u.encode(self.encoding))
+    def test_stream_bom(self):
+        unistring = "ABC\u00A1\u2200XYZ"
+        bytestring = codecs.BOM_UTF8 + b"ABC\xC2\xA1\xE2\x88\x80XYZ"
+
+        reader = codecs.getreader("utf-8-sig")
+        for sizehint in [None] + list(range(1, 11)) + \
+                        [64, 128, 256, 512, 1024]:
+            istream = reader(io.BytesIO(bytestring))
+            ostream = io.StringIO()
+            while 1:
+                if sizehint is not None:
+                    data = istream.read(sizehint)
+                else:
+                    data = istream.read()
+
+                if not data:
+                    break
+                ostream.write(data)
+
+            got = ostream.getvalue()
+            self.assertEqual(got, unistring)
+
+    def test_stream_bare(self):
+        unistring = "ABC\u00A1\u2200XYZ"
+        bytestring = b"ABC\xC2\xA1\xE2\x88\x80XYZ"
+
+        reader = codecs.getreader("utf-8-sig")
+        for sizehint in [None] + list(range(1, 11)) + \
+                        [64, 128, 256, 512, 1024]:
+            istream = reader(io.BytesIO(bytestring))
+            ostream = io.StringIO()
+            while 1:
+                if sizehint is not None:
+                    data = istream.read(sizehint)
+                else:
+                    data = istream.read()
+
+                if not data:
+                    break
+                ostream.write(data)
+
+            got = ostream.getvalue()
+            self.assertEqual(got, unistring)
+
+class EscapeDecodeTest(unittest.TestCase):
+    def test_empty(self):
+        self.assertEquals(codecs.escape_decode(""), ("", 0))
 
 class RecodingTest(unittest.TestCase):
     def test_recoding(self):
author	Guido van Rossum <guido@python.org>	2007-11-19 18:03:44 +0000
committer	Guido van Rossum <guido@python.org>	2007-11-19 18:03:44 +0000
commit	5d0a4df5b8c382eabff29eb748c8641dbec91850 (patch)
tree	7874ff97b5c846b25a945247145c7e2e8b673989
parent	75cce6ddba52166b154f1dca959f10436d999dbf (diff)
download	cpython-5d0a4df5b8c382eabff29eb748c8641dbec91850.tar.gz