diff options
author | Johan Dahlin <johan@gnome.org> | 2010-12-02 14:10:33 -0200 |
---|---|---|
committer | Johan Dahlin <johan@gnome.org> | 2010-12-02 14:10:33 -0200 |
commit | b6405089448ea588989faf1b4bff3aa96cd5c291 (patch) | |
tree | b236aebc6f9593c9b121f7d2ba209c8a9dd0e527 | |
parent | aa8b7d2d0f586976ea7399d95e1ccce3000b4734 (diff) | |
download | gobject-introspection-b6405089448ea588989faf1b4bff3aa96cd5c291.tar.gz |
Add proper unicode support to the source scanner
The assumption is that the only allowed source encoding
is utf-8. Always strings as unicode and fix up the transformer
and xml writer to properly output utf-8.
-rw-r--r-- | giscanner/giscannermodule.c | 42 | ||||
-rw-r--r-- | giscanner/transformer.py | 2 | ||||
-rwxr-xr-x | giscanner/xmlwriter.py | 2 |
3 files changed, 38 insertions, 8 deletions
diff --git a/giscanner/giscannermodule.c b/giscanner/giscannermodule.c index a8061dbc..0f94240f 100644 --- a/giscanner/giscannermodule.c +++ b/giscanner/giscannermodule.c @@ -564,7 +564,7 @@ static int calc_attrs_length(PyObject *attributes, int indent, for (i = 0; i < PyList_Size (attributes); ++i) { - PyObject *tuple; + PyObject *tuple, *pyvalue; char *attr, *value; char *escaped; @@ -572,9 +572,24 @@ static int calc_attrs_length(PyObject *attributes, int indent, if (PyTuple_GetItem(tuple, 1) == Py_None) continue; - if (!PyArg_ParseTuple(tuple, "ss", &attr, &value)) + if (!PyArg_ParseTuple(tuple, "sO", &attr, &pyvalue)) return -1; + if (PyUnicode_Check(pyvalue)) { + PyObject *s = PyUnicode_AsUTF8String(pyvalue); + if (!s) { + return -1; + } + value = PyString_AsString(s); + Py_DECREF(s); + } else if (PyString_Check(pyvalue)) { + value = PyString_AsString(pyvalue); + } else { + PyErr_SetString(PyExc_TypeError, + "value must be string or unicode"); + return -1; + } + escaped = g_markup_escape_text (value, -1); attr_length += 2 + strlen(attr) + strlen(escaped) + 2; g_free(escaped); @@ -605,7 +620,7 @@ pygi_collect_attributes (PyObject *self, return NULL; if (attributes == Py_None || !PyList_Size(attributes)) - return PyString_FromString(""); + return PyUnicode_FromString(""); len = calc_attrs_length(attributes, indent, self_indent); if (len < 0) @@ -620,7 +635,7 @@ pygi_collect_attributes (PyObject *self, for (i = 0; i < PyList_Size (attributes); ++i) { - PyObject *tuple; + PyObject *tuple, *pyvalue; char *attr, *value, *escaped; tuple = PyList_GetItem (attributes, i); @@ -643,9 +658,24 @@ pygi_collect_attributes (PyObject *self, continue; /* this leaks, but we exit after, so */ - if (!PyArg_ParseTuple(tuple, "ss", &attr, &value)) + if (!PyArg_ParseTuple(tuple, "sO", &attr, &pyvalue)) return NULL; + if (PyUnicode_Check(pyvalue)) { + PyObject *s = PyUnicode_AsUTF8String(pyvalue); + if (!s) { + return NULL; + } + value = PyString_AsString(s); + Py_DECREF(s); + } else if (PyString_Check(pyvalue)) { + value = PyString_AsString(pyvalue); + } else { + PyErr_SetString(PyExc_TypeError, + "value must be string or unicode"); + return NULL; + } + if (indent_len && !first) { g_string_append_c (attr_value, '\n'); @@ -663,7 +693,7 @@ pygi_collect_attributes (PyObject *self, first = FALSE; } - return PyString_FromString (g_string_free (attr_value, FALSE)); + return PyUnicode_FromString (g_string_free (attr_value, FALSE)); } /* Module */ diff --git a/giscanner/transformer.py b/giscanner/transformer.py index f07e8d17..4cd24484 100644 --- a/giscanner/transformer.py +++ b/giscanner/transformer.py @@ -608,7 +608,7 @@ raise ValueError.""" return None if symbol.const_string is not None: typeval = ast.TYPE_STRING - value = symbol.const_string + value = unicode(symbol.const_string, 'utf-8') elif symbol.const_int is not None: typeval = ast.TYPE_INT value = '%d' % (symbol.const_int, ) diff --git a/giscanner/xmlwriter.py b/giscanner/xmlwriter.py index a418cc40..84c24c0f 100755 --- a/giscanner/xmlwriter.py +++ b/giscanner/xmlwriter.py @@ -120,7 +120,7 @@ class XMLWriter(object): if indent: self._data.write('%s%s%s' % ( self._indent_char * self._indent, - line, + line.encode('utf-8'), self._newline_char)) else: self._data.write('%s%s' % (line, self._newline_char)) |