summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorxi <xi@18f92427-320e-0410-9341-c67f048884a3>2009-08-29 20:59:56 +0000
committerxi <xi@18f92427-320e-0410-9341-c67f048884a3>2009-08-29 20:59:56 +0000
commit884379d8e880f6a7cfbd76d695d697195a9ba7e7 (patch)
tree9a389adbe8bc8db970f05663dbc120f1d2cfb83e
parente0520e571eba024e4f41fb6511fa9dd1b7afdef4 (diff)
downloadpyyaml-884379d8e880f6a7cfbd76d695d697195a9ba7e7.tar.gz
Fixed emitting of invalid BOM for UTF-16.
git-svn-id: http://svn.pyyaml.org/pyyaml/trunk@351 18f92427-320e-0410-9341-c67f048884a3
-rw-r--r--lib/yaml/emitter.py2
-rw-r--r--lib3/yaml/emitter.py2
-rw-r--r--tests/data/utf16be.code1
-rw-r--r--tests/data/utf16be.databin0 -> 30 bytes
-rw-r--r--tests/data/utf16le.code1
-rw-r--r--tests/data/utf16le.databin0 -> 30 bytes
-rw-r--r--tests/data/utf8-implicit.code1
-rw-r--r--tests/data/utf8-implicit.data1
-rw-r--r--tests/data/utf8.code1
-rw-r--r--tests/data/utf8.data1
-rw-r--r--tests/lib/test_representer.py53
-rw-r--r--tests/lib3/test_representer.py53
12 files changed, 62 insertions, 54 deletions
diff --git a/lib/yaml/emitter.py b/lib/yaml/emitter.py
index f763b90..4cb2c8a 100644
--- a/lib/yaml/emitter.py
+++ b/lib/yaml/emitter.py
@@ -789,7 +789,7 @@ class Emitter(object):
def write_stream_start(self):
# Write BOM if needed.
if self.encoding and self.encoding.startswith('utf-16'):
- self.stream.write(u'\xFF\xFE'.encode(self.encoding))
+ self.stream.write(u'\uFEFF'.encode(self.encoding))
def write_stream_end(self):
self.flush_stream()
diff --git a/lib3/yaml/emitter.py b/lib3/yaml/emitter.py
index 5815909..fd55946 100644
--- a/lib3/yaml/emitter.py
+++ b/lib3/yaml/emitter.py
@@ -785,7 +785,7 @@ class Emitter:
def write_stream_start(self):
# Write BOM if needed.
if self.encoding and self.encoding.startswith('utf-16'):
- self.stream.write('\xFF\xFE'.encode(self.encoding))
+ self.stream.write('\uFEFF'.encode(self.encoding))
def write_stream_end(self):
self.flush_stream()
diff --git a/tests/data/utf16be.code b/tests/data/utf16be.code
new file mode 100644
index 0000000..c45b371
--- /dev/null
+++ b/tests/data/utf16be.code
@@ -0,0 +1 @@
+"UTF-16-BE"
diff --git a/tests/data/utf16be.data b/tests/data/utf16be.data
new file mode 100644
index 0000000..50dcfae
--- /dev/null
+++ b/tests/data/utf16be.data
Binary files differ
diff --git a/tests/data/utf16le.code b/tests/data/utf16le.code
new file mode 100644
index 0000000..400530a
--- /dev/null
+++ b/tests/data/utf16le.code
@@ -0,0 +1 @@
+"UTF-16-LE"
diff --git a/tests/data/utf16le.data b/tests/data/utf16le.data
new file mode 100644
index 0000000..76f5e73
--- /dev/null
+++ b/tests/data/utf16le.data
Binary files differ
diff --git a/tests/data/utf8-implicit.code b/tests/data/utf8-implicit.code
new file mode 100644
index 0000000..29326db
--- /dev/null
+++ b/tests/data/utf8-implicit.code
@@ -0,0 +1 @@
+"implicit UTF-8"
diff --git a/tests/data/utf8-implicit.data b/tests/data/utf8-implicit.data
new file mode 100644
index 0000000..9d8081e
--- /dev/null
+++ b/tests/data/utf8-implicit.data
@@ -0,0 +1 @@
+--- implicit UTF-8
diff --git a/tests/data/utf8.code b/tests/data/utf8.code
new file mode 100644
index 0000000..dcf11cc
--- /dev/null
+++ b/tests/data/utf8.code
@@ -0,0 +1 @@
+"UTF-8"
diff --git a/tests/data/utf8.data b/tests/data/utf8.data
new file mode 100644
index 0000000..686f48a
--- /dev/null
+++ b/tests/data/utf8.data
@@ -0,0 +1 @@
+--- UTF-8
diff --git a/tests/lib/test_representer.py b/tests/lib/test_representer.py
index f814705..a82a32a 100644
--- a/tests/lib/test_representer.py
+++ b/tests/lib/test_representer.py
@@ -6,33 +6,34 @@ import pprint
def test_representer_types(code_filename, verbose=False):
test_constructor._make_objects()
for allow_unicode in [False, True]:
- native1 = test_constructor._load_code(open(code_filename, 'rb').read())
- native2 = None
- try:
- output = yaml.dump(native1, Dumper=test_constructor.MyDumper,
- allow_unicode=allow_unicode)
- native2 = yaml.load(output, Loader=test_constructor.MyLoader)
+ for encoding in ['utf-8', 'utf-16-be', 'utf-16-le']:
+ native1 = test_constructor._load_code(open(code_filename, 'rb').read())
+ native2 = None
try:
- if native1 == native2:
- continue
- except TypeError:
- pass
- value1 = test_constructor._serialize_value(native1)
- value2 = test_constructor._serialize_value(native2)
- if verbose:
- print "SERIALIZED NATIVE1:"
- print value1
- print "SERIALIZED NATIVE2:"
- print value2
- assert value1 == value2, (native1, native2)
- finally:
- if verbose:
- print "NATIVE1:"
- pprint.pprint(native1)
- print "NATIVE2:"
- pprint.pprint(native2)
- print "OUTPUT:"
- print output
+ output = yaml.dump(native1, Dumper=test_constructor.MyDumper,
+ allow_unicode=allow_unicode, encoding=encoding)
+ native2 = yaml.load(output, Loader=test_constructor.MyLoader)
+ try:
+ if native1 == native2:
+ continue
+ except TypeError:
+ pass
+ value1 = test_constructor._serialize_value(native1)
+ value2 = test_constructor._serialize_value(native2)
+ if verbose:
+ print "SERIALIZED NATIVE1:"
+ print value1
+ print "SERIALIZED NATIVE2:"
+ print value2
+ assert value1 == value2, (native1, native2)
+ finally:
+ if verbose:
+ print "NATIVE1:"
+ pprint.pprint(native1)
+ print "NATIVE2:"
+ pprint.pprint(native2)
+ print "OUTPUT:"
+ print output
test_representer_types.unittest = ['.code']
diff --git a/tests/lib3/test_representer.py b/tests/lib3/test_representer.py
index c619d13..10d4a8f 100644
--- a/tests/lib3/test_representer.py
+++ b/tests/lib3/test_representer.py
@@ -6,33 +6,34 @@ import pprint
def test_representer_types(code_filename, verbose=False):
test_constructor._make_objects()
for allow_unicode in [False, True]:
- native1 = test_constructor._load_code(open(code_filename, 'rb').read())
- native2 = None
- try:
- output = yaml.dump(native1, Dumper=test_constructor.MyDumper,
- allow_unicode=allow_unicode)
- native2 = yaml.load(output, Loader=test_constructor.MyLoader)
+ for encoding in ['utf-8', 'utf-16-be', 'utf-16-le']:
+ native1 = test_constructor._load_code(open(code_filename, 'rb').read())
+ native2 = None
try:
- if native1 == native2:
- continue
- except TypeError:
- pass
- value1 = test_constructor._serialize_value(native1)
- value2 = test_constructor._serialize_value(native2)
- if verbose:
- print("SERIALIZED NATIVE1:")
- print(value1)
- print("SERIALIZED NATIVE2:")
- print(value2)
- assert value1 == value2, (native1, native2)
- finally:
- if verbose:
- print("NATIVE1:")
- pprint.pprint(native1)
- print("NATIVE2:")
- pprint.pprint(native2)
- print("OUTPUT:")
- print(output)
+ output = yaml.dump(native1, Dumper=test_constructor.MyDumper,
+ allow_unicode=allow_unicode, encoding=encoding)
+ native2 = yaml.load(output, Loader=test_constructor.MyLoader)
+ try:
+ if native1 == native2:
+ continue
+ except TypeError:
+ pass
+ value1 = test_constructor._serialize_value(native1)
+ value2 = test_constructor._serialize_value(native2)
+ if verbose:
+ print("SERIALIZED NATIVE1:")
+ print(value1)
+ print("SERIALIZED NATIVE2:")
+ print(value2)
+ assert value1 == value2, (native1, native2)
+ finally:
+ if verbose:
+ print("NATIVE1:")
+ pprint.pprint(native1)
+ print("NATIVE2:")
+ pprint.pprint(native2)
+ print("OUTPUT:")
+ print(output)
test_representer_types.unittest = ['.code']