summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohan Dahlin <johan@gnome.org>2010-12-02 14:10:33 -0200
committerJohan Dahlin <johan@gnome.org>2010-12-02 14:10:33 -0200
commitb6405089448ea588989faf1b4bff3aa96cd5c291 (patch)
treeb236aebc6f9593c9b121f7d2ba209c8a9dd0e527
parentaa8b7d2d0f586976ea7399d95e1ccce3000b4734 (diff)
downloadgobject-introspection-b6405089448ea588989faf1b4bff3aa96cd5c291.tar.gz
Add proper unicode support to the source scanner
The assumption is that the only allowed source encoding is utf-8. Always strings as unicode and fix up the transformer and xml writer to properly output utf-8.
-rw-r--r--giscanner/giscannermodule.c42
-rw-r--r--giscanner/transformer.py2
-rwxr-xr-xgiscanner/xmlwriter.py2
3 files changed, 38 insertions, 8 deletions
diff --git a/giscanner/giscannermodule.c b/giscanner/giscannermodule.c
index a8061dbc..0f94240f 100644
--- a/giscanner/giscannermodule.c
+++ b/giscanner/giscannermodule.c
@@ -564,7 +564,7 @@ static int calc_attrs_length(PyObject *attributes, int indent,
for (i = 0; i < PyList_Size (attributes); ++i)
{
- PyObject *tuple;
+ PyObject *tuple, *pyvalue;
char *attr, *value;
char *escaped;
@@ -572,9 +572,24 @@ static int calc_attrs_length(PyObject *attributes, int indent,
if (PyTuple_GetItem(tuple, 1) == Py_None)
continue;
- if (!PyArg_ParseTuple(tuple, "ss", &attr, &value))
+ if (!PyArg_ParseTuple(tuple, "sO", &attr, &pyvalue))
return -1;
+ if (PyUnicode_Check(pyvalue)) {
+ PyObject *s = PyUnicode_AsUTF8String(pyvalue);
+ if (!s) {
+ return -1;
+ }
+ value = PyString_AsString(s);
+ Py_DECREF(s);
+ } else if (PyString_Check(pyvalue)) {
+ value = PyString_AsString(pyvalue);
+ } else {
+ PyErr_SetString(PyExc_TypeError,
+ "value must be string or unicode");
+ return -1;
+ }
+
escaped = g_markup_escape_text (value, -1);
attr_length += 2 + strlen(attr) + strlen(escaped) + 2;
g_free(escaped);
@@ -605,7 +620,7 @@ pygi_collect_attributes (PyObject *self,
return NULL;
if (attributes == Py_None || !PyList_Size(attributes))
- return PyString_FromString("");
+ return PyUnicode_FromString("");
len = calc_attrs_length(attributes, indent, self_indent);
if (len < 0)
@@ -620,7 +635,7 @@ pygi_collect_attributes (PyObject *self,
for (i = 0; i < PyList_Size (attributes); ++i)
{
- PyObject *tuple;
+ PyObject *tuple, *pyvalue;
char *attr, *value, *escaped;
tuple = PyList_GetItem (attributes, i);
@@ -643,9 +658,24 @@ pygi_collect_attributes (PyObject *self,
continue;
/* this leaks, but we exit after, so */
- if (!PyArg_ParseTuple(tuple, "ss", &attr, &value))
+ if (!PyArg_ParseTuple(tuple, "sO", &attr, &pyvalue))
return NULL;
+ if (PyUnicode_Check(pyvalue)) {
+ PyObject *s = PyUnicode_AsUTF8String(pyvalue);
+ if (!s) {
+ return NULL;
+ }
+ value = PyString_AsString(s);
+ Py_DECREF(s);
+ } else if (PyString_Check(pyvalue)) {
+ value = PyString_AsString(pyvalue);
+ } else {
+ PyErr_SetString(PyExc_TypeError,
+ "value must be string or unicode");
+ return NULL;
+ }
+
if (indent_len && !first)
{
g_string_append_c (attr_value, '\n');
@@ -663,7 +693,7 @@ pygi_collect_attributes (PyObject *self,
first = FALSE;
}
- return PyString_FromString (g_string_free (attr_value, FALSE));
+ return PyUnicode_FromString (g_string_free (attr_value, FALSE));
}
/* Module */
diff --git a/giscanner/transformer.py b/giscanner/transformer.py
index f07e8d17..4cd24484 100644
--- a/giscanner/transformer.py
+++ b/giscanner/transformer.py
@@ -608,7 +608,7 @@ raise ValueError."""
return None
if symbol.const_string is not None:
typeval = ast.TYPE_STRING
- value = symbol.const_string
+ value = unicode(symbol.const_string, 'utf-8')
elif symbol.const_int is not None:
typeval = ast.TYPE_INT
value = '%d' % (symbol.const_int, )
diff --git a/giscanner/xmlwriter.py b/giscanner/xmlwriter.py
index a418cc40..84c24c0f 100755
--- a/giscanner/xmlwriter.py
+++ b/giscanner/xmlwriter.py
@@ -120,7 +120,7 @@ class XMLWriter(object):
if indent:
self._data.write('%s%s%s' % (
self._indent_char * self._indent,
- line,
+ line.encode('utf-8'),
self._newline_char))
else:
self._data.write('%s%s' % (line, self._newline_char))