summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorColin Walters <walters@verbum.org>2010-12-02 14:58:27 -0500
committerColin Walters <walters@verbum.org>2010-12-02 14:58:27 -0500
commit805ed9f5150d8d202700d4be67af9d7068d8db5b (patch)
tree042383ebaf9cebc3a98bf0185ae69cafa45885d9
parentb6405089448ea588989faf1b4bff3aa96cd5c291 (diff)
downloadgobject-introspection-805ed9f5150d8d202700d4be67af9d7068d8db5b.tar.gz
scanner: More XML unicode fixes
Coerce input temporarily inside xmlwriter into Unicode to do string manipulation, and then only convert it to "UTF8str" when writing into the data buffer.
-rwxr-xr-xgiscanner/xmlwriter.py22
1 files changed, 13 insertions, 9 deletions
diff --git a/giscanner/xmlwriter.py b/giscanner/xmlwriter.py
index 84c24c0f..76880de0 100755
--- a/giscanner/xmlwriter.py
+++ b/giscanner/xmlwriter.py
@@ -95,10 +95,10 @@ class XMLWriter(object):
tag_name, attributes, self._indent,
self._indent_char,
len(tag_name) + 2)
- self.write_line('<%s%s>' % (tag_name, attrs))
+ self.write_line(u'<%s%s>' % (tag_name, attrs))
def _close_tag(self, tag_name):
- self.write_line('</%s>' % (tag_name, ))
+ self.write_line(u'</%s>' % (tag_name, ))
# Public API
@@ -113,17 +113,19 @@ class XMLWriter(object):
def get_xml(self):
return self._data.getvalue()
- def write_line(self, line='', indent=True, do_escape=False):
+ def write_line(self, line=u'', indent=True, do_escape=False):
+ if isinstance(line, str):
+ line = line.decode('utf-8')
+ assert isinstance(line, unicode)
if do_escape:
- line = escape(str(line))
-
+ line = escape(str(line)).decode('utf-8')
if indent:
self._data.write('%s%s%s' % (
self._indent_char * self._indent,
line.encode('utf-8'),
self._newline_char))
else:
- self._data.write('%s%s' % (line, self._newline_char))
+ self._data.write('%s%s' % (line.encode('utf-8'), self._newline_char))
def write_comment(self, text):
self.write_line('<!-- %s -->' % (text, ))
@@ -131,11 +133,13 @@ class XMLWriter(object):
def write_tag(self, tag_name, attributes, data=None):
if attributes is None:
attributes = []
- prefix = '<%s' % (tag_name, )
+ prefix = u'<%s' % (tag_name, )
if data is not None:
- suffix = '>%s</%s>' % (escape(data), tag_name)
+ if isinstance(data, str):
+ data = data.decode('UTF-8')
+ suffix = u'>%s</%s>' % (escape(data), tag_name)
else:
- suffix = '/>'
+ suffix = u'/>'
attrs = collect_attributes(
tag_name, attributes,
self._indent,