diff options
author | xi <xi@18f92427-320e-0410-9341-c67f048884a3> | 2006-05-06 22:09:50 +0000 |
---|---|---|
committer | xi <xi@18f92427-320e-0410-9341-c67f048884a3> | 2006-05-06 22:09:50 +0000 |
commit | 1d1b0082a5a833264b0093b9ba41a3b0efd2a13e (patch) | |
tree | 1b6c1cf84038a6949cf3607c76f310fc4b1849d6 | |
parent | 01134b7d3c475a453ca26ebe320baee5b521decb (diff) | |
download | pyyaml-1d1b0082a5a833264b0093b9ba41a3b0efd2a13e.tar.gz |
Fix #11 (Thanks to edemaine(at)mit.edu).
git-svn-id: http://svn.pyyaml.org/pyyaml/trunk@153 18f92427-320e-0410-9341-c67f048884a3
-rw-r--r-- | lib/yaml/__init__.py | 2 | ||||
-rw-r--r-- | lib/yaml/emitter.py | 11 | ||||
-rw-r--r-- | lib/yaml/nodes.py | 25 | ||||
-rw-r--r-- | tests/data/emitting-unacceptable-unicode-character-bug.code | 1 | ||||
-rw-r--r-- | tests/data/emitting-unacceptable-unicode-character-bug.data | 1 | ||||
-rw-r--r-- | tests/test_representer.py | 8 |
6 files changed, 29 insertions, 19 deletions
diff --git a/lib/yaml/__init__.py b/lib/yaml/__init__.py index 10f6f35..c30973a 100644 --- a/lib/yaml/__init__.py +++ b/lib/yaml/__init__.py @@ -187,7 +187,7 @@ def safe_dump(data, stream=None, **kwds): """ return dump_all([data], stream, Dumper=SafeDumper, **kwds) -def add_implicit_detector(tag, regexp, first=None, +def add_implicit_resolver(tag, regexp, first=None, Loader=Loader, Dumper=Dumper): """ Add an implicit scalar detector. diff --git a/lib/yaml/emitter.py b/lib/yaml/emitter.py index d06fde1..a34c452 100644 --- a/lib/yaml/emitter.py +++ b/lib/yaml/emitter.py @@ -697,12 +697,13 @@ class Emitter: if ch in u'\n\x85\u2028\u2029': line_breaks = True if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): - if ch < u'\x80' or ch == u'\uFEFF': # '\uFEFF' is BOM. - special_characters = True - else: + if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' + or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF': unicode_characters = True if not self.allow_unicode: special_characters = True + else: + special_characters = True # Spaces, line breaks, and how they are mixed. State machine. @@ -961,7 +962,9 @@ class Emitter: ch = text[end] if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \ or not (u'\x20' <= ch <= u'\x7E' - or (self.allow_unicode and ch > u'\x7F')): + or (self.allow_unicode + and (u'\xA0' <= ch <= u'\uD7FF' + or u'\uE000' <= ch <= u'\uFFFD'))): if start < end: data = text[start:end] self.column += len(data) diff --git a/lib/yaml/nodes.py b/lib/yaml/nodes.py index 1f9f094..cb8c1cb 100644 --- a/lib/yaml/nodes.py +++ b/lib/yaml/nodes.py @@ -7,18 +7,19 @@ class Node: self.end_mark = end_mark def __repr__(self): value = self.value - if isinstance(value, list): - if len(value) == 0: - value = '<empty>' - elif len(value) == 1: - value = '<1 item>' - else: - value = '<%d items>' % len(value) - else: - if len(value) > 75: - value = repr(value[:70]+u' ... ') - else: - value = repr(value) + #if isinstance(value, list): + # if len(value) == 0: + # value = '<empty>' + # elif len(value) == 1: + # value = '<1 item>' + # else: + # value = '<%d items>' % len(value) + #else: + # if len(value) > 75: + # value = repr(value[:70]+u' ... ') + # else: + # value = repr(value) + value = repr(value) return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) class ScalarNode(Node): diff --git a/tests/data/emitting-unacceptable-unicode-character-bug.code b/tests/data/emitting-unacceptable-unicode-character-bug.code new file mode 100644 index 0000000..4b92854 --- /dev/null +++ b/tests/data/emitting-unacceptable-unicode-character-bug.code @@ -0,0 +1 @@ +u"\udd00" diff --git a/tests/data/emitting-unacceptable-unicode-character-bug.data b/tests/data/emitting-unacceptable-unicode-character-bug.data new file mode 100644 index 0000000..4b92854 --- /dev/null +++ b/tests/data/emitting-unacceptable-unicode-character-bug.data @@ -0,0 +1 @@ +u"\udd00" diff --git a/tests/test_representer.py b/tests/test_representer.py index 3de0a28..e0fa894 100644 --- a/tests/test_representer.py +++ b/tests/test_representer.py @@ -6,12 +6,15 @@ from yaml import * class TestRepresenterTypes(test_appliance.TestAppliance): - def _testTypes(self, test_name, data_filename, code_filename): + def _testTypesUnicode(self, test_name, data_filename, code_filename): + return self._testTypes(test_name, data_filename, code_filename, allow_unicode=True) + + def _testTypes(self, test_name, data_filename, code_filename, allow_unicode=False): data1 = eval(file(code_filename, 'rb').read()) data2 = None output = None try: - output = dump(data1, Dumper=MyDumper) + output = dump(data1, Dumper=MyDumper, allow_unicode=allow_unicode) data2 = load(output, Loader=MyLoader) self.failUnlessEqual(type(data1), type(data2)) try: @@ -42,4 +45,5 @@ class TestRepresenterTypes(test_appliance.TestAppliance): raise TestRepresenterTypes.add_tests('testTypes', '.data', '.code') +TestRepresenterTypes.add_tests('testTypesUnicode', '.data', '.code') |