diff options
author | xi <xi@18f92427-320e-0410-9341-c67f048884a3> | 2009-08-29 20:59:56 +0000 |
---|---|---|
committer | xi <xi@18f92427-320e-0410-9341-c67f048884a3> | 2009-08-29 20:59:56 +0000 |
commit | 884379d8e880f6a7cfbd76d695d697195a9ba7e7 (patch) | |
tree | 9a389adbe8bc8db970f05663dbc120f1d2cfb83e | |
parent | e0520e571eba024e4f41fb6511fa9dd1b7afdef4 (diff) | |
download | pyyaml-884379d8e880f6a7cfbd76d695d697195a9ba7e7.tar.gz |
Fixed emitting of invalid BOM for UTF-16.
git-svn-id: http://svn.pyyaml.org/pyyaml/trunk@351 18f92427-320e-0410-9341-c67f048884a3
-rw-r--r-- | lib/yaml/emitter.py | 2 | ||||
-rw-r--r-- | lib3/yaml/emitter.py | 2 | ||||
-rw-r--r-- | tests/data/utf16be.code | 1 | ||||
-rw-r--r-- | tests/data/utf16be.data | bin | 0 -> 30 bytes | |||
-rw-r--r-- | tests/data/utf16le.code | 1 | ||||
-rw-r--r-- | tests/data/utf16le.data | bin | 0 -> 30 bytes | |||
-rw-r--r-- | tests/data/utf8-implicit.code | 1 | ||||
-rw-r--r-- | tests/data/utf8-implicit.data | 1 | ||||
-rw-r--r-- | tests/data/utf8.code | 1 | ||||
-rw-r--r-- | tests/data/utf8.data | 1 | ||||
-rw-r--r-- | tests/lib/test_representer.py | 53 | ||||
-rw-r--r-- | tests/lib3/test_representer.py | 53 |
12 files changed, 62 insertions, 54 deletions
diff --git a/lib/yaml/emitter.py b/lib/yaml/emitter.py index f763b90..4cb2c8a 100644 --- a/lib/yaml/emitter.py +++ b/lib/yaml/emitter.py @@ -789,7 +789,7 @@ class Emitter(object): def write_stream_start(self): # Write BOM if needed. if self.encoding and self.encoding.startswith('utf-16'): - self.stream.write(u'\xFF\xFE'.encode(self.encoding)) + self.stream.write(u'\uFEFF'.encode(self.encoding)) def write_stream_end(self): self.flush_stream() diff --git a/lib3/yaml/emitter.py b/lib3/yaml/emitter.py index 5815909..fd55946 100644 --- a/lib3/yaml/emitter.py +++ b/lib3/yaml/emitter.py @@ -785,7 +785,7 @@ class Emitter: def write_stream_start(self): # Write BOM if needed. if self.encoding and self.encoding.startswith('utf-16'): - self.stream.write('\xFF\xFE'.encode(self.encoding)) + self.stream.write('\uFEFF'.encode(self.encoding)) def write_stream_end(self): self.flush_stream() diff --git a/tests/data/utf16be.code b/tests/data/utf16be.code new file mode 100644 index 0000000..c45b371 --- /dev/null +++ b/tests/data/utf16be.code @@ -0,0 +1 @@ +"UTF-16-BE" diff --git a/tests/data/utf16be.data b/tests/data/utf16be.data Binary files differnew file mode 100644 index 0000000..50dcfae --- /dev/null +++ b/tests/data/utf16be.data diff --git a/tests/data/utf16le.code b/tests/data/utf16le.code new file mode 100644 index 0000000..400530a --- /dev/null +++ b/tests/data/utf16le.code @@ -0,0 +1 @@ +"UTF-16-LE" diff --git a/tests/data/utf16le.data b/tests/data/utf16le.data Binary files differnew file mode 100644 index 0000000..76f5e73 --- /dev/null +++ b/tests/data/utf16le.data diff --git a/tests/data/utf8-implicit.code b/tests/data/utf8-implicit.code new file mode 100644 index 0000000..29326db --- /dev/null +++ b/tests/data/utf8-implicit.code @@ -0,0 +1 @@ +"implicit UTF-8" diff --git a/tests/data/utf8-implicit.data b/tests/data/utf8-implicit.data new file mode 100644 index 0000000..9d8081e --- /dev/null +++ b/tests/data/utf8-implicit.data @@ -0,0 +1 @@ +--- implicit UTF-8 diff --git a/tests/data/utf8.code b/tests/data/utf8.code new file mode 100644 index 0000000..dcf11cc --- /dev/null +++ b/tests/data/utf8.code @@ -0,0 +1 @@ +"UTF-8" diff --git a/tests/data/utf8.data b/tests/data/utf8.data new file mode 100644 index 0000000..686f48a --- /dev/null +++ b/tests/data/utf8.data @@ -0,0 +1 @@ +--- UTF-8 diff --git a/tests/lib/test_representer.py b/tests/lib/test_representer.py index f814705..a82a32a 100644 --- a/tests/lib/test_representer.py +++ b/tests/lib/test_representer.py @@ -6,33 +6,34 @@ import pprint def test_representer_types(code_filename, verbose=False): test_constructor._make_objects() for allow_unicode in [False, True]: - native1 = test_constructor._load_code(open(code_filename, 'rb').read()) - native2 = None - try: - output = yaml.dump(native1, Dumper=test_constructor.MyDumper, - allow_unicode=allow_unicode) - native2 = yaml.load(output, Loader=test_constructor.MyLoader) + for encoding in ['utf-8', 'utf-16-be', 'utf-16-le']: + native1 = test_constructor._load_code(open(code_filename, 'rb').read()) + native2 = None try: - if native1 == native2: - continue - except TypeError: - pass - value1 = test_constructor._serialize_value(native1) - value2 = test_constructor._serialize_value(native2) - if verbose: - print "SERIALIZED NATIVE1:" - print value1 - print "SERIALIZED NATIVE2:" - print value2 - assert value1 == value2, (native1, native2) - finally: - if verbose: - print "NATIVE1:" - pprint.pprint(native1) - print "NATIVE2:" - pprint.pprint(native2) - print "OUTPUT:" - print output + output = yaml.dump(native1, Dumper=test_constructor.MyDumper, + allow_unicode=allow_unicode, encoding=encoding) + native2 = yaml.load(output, Loader=test_constructor.MyLoader) + try: + if native1 == native2: + continue + except TypeError: + pass + value1 = test_constructor._serialize_value(native1) + value2 = test_constructor._serialize_value(native2) + if verbose: + print "SERIALIZED NATIVE1:" + print value1 + print "SERIALIZED NATIVE2:" + print value2 + assert value1 == value2, (native1, native2) + finally: + if verbose: + print "NATIVE1:" + pprint.pprint(native1) + print "NATIVE2:" + pprint.pprint(native2) + print "OUTPUT:" + print output test_representer_types.unittest = ['.code'] diff --git a/tests/lib3/test_representer.py b/tests/lib3/test_representer.py index c619d13..10d4a8f 100644 --- a/tests/lib3/test_representer.py +++ b/tests/lib3/test_representer.py @@ -6,33 +6,34 @@ import pprint def test_representer_types(code_filename, verbose=False): test_constructor._make_objects() for allow_unicode in [False, True]: - native1 = test_constructor._load_code(open(code_filename, 'rb').read()) - native2 = None - try: - output = yaml.dump(native1, Dumper=test_constructor.MyDumper, - allow_unicode=allow_unicode) - native2 = yaml.load(output, Loader=test_constructor.MyLoader) + for encoding in ['utf-8', 'utf-16-be', 'utf-16-le']: + native1 = test_constructor._load_code(open(code_filename, 'rb').read()) + native2 = None try: - if native1 == native2: - continue - except TypeError: - pass - value1 = test_constructor._serialize_value(native1) - value2 = test_constructor._serialize_value(native2) - if verbose: - print("SERIALIZED NATIVE1:") - print(value1) - print("SERIALIZED NATIVE2:") - print(value2) - assert value1 == value2, (native1, native2) - finally: - if verbose: - print("NATIVE1:") - pprint.pprint(native1) - print("NATIVE2:") - pprint.pprint(native2) - print("OUTPUT:") - print(output) + output = yaml.dump(native1, Dumper=test_constructor.MyDumper, + allow_unicode=allow_unicode, encoding=encoding) + native2 = yaml.load(output, Loader=test_constructor.MyLoader) + try: + if native1 == native2: + continue + except TypeError: + pass + value1 = test_constructor._serialize_value(native1) + value2 = test_constructor._serialize_value(native2) + if verbose: + print("SERIALIZED NATIVE1:") + print(value1) + print("SERIALIZED NATIVE2:") + print(value2) + assert value1 == value2, (native1, native2) + finally: + if verbose: + print("NATIVE1:") + pprint.pprint(native1) + print("NATIVE2:") + pprint.pprint(native2) + print("OUTPUT:") + print(output) test_representer_types.unittest = ['.code'] |