summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Reiter <reiter.christoph@gmail.com>2018-07-27 14:58:14 +0200
committerChristoph Reiter <reiter.christoph@gmail.com>2018-07-27 15:03:52 +0200
commit90e98240c854fb8ee491919794d65fa431e78c01 (patch)
treede85ed3679218b655137d7e87674b19349926432
parentdc4c2f9d2db5563ad482a09d04aeeab6972c53ac (diff)
downloadgobject-introspection-90e98240c854fb8ee491919794d65fa431e78c01.tar.gz
xmlwriter: move collect_attributes() back to a Python implementation
This reverts f345916405d94829696985 and related. The commit states that both versions are about the same in performance, but the C version is more code and harder to maintain. It also states that the behaviour re invalid control characters is better with the C version which produces entities. But those make any Python xml parser fail, which given that most of our tooling is Python, doesn't seem better to me, see #135.
-rw-r--r--giscanner/giscannermodule.c160
-rwxr-xr-xgiscanner/xmlwriter.py44
-rw-r--r--tests/scanner/test_xmlwriter.py13
3 files changed, 50 insertions, 167 deletions
diff --git a/giscanner/giscannermodule.c b/giscanner/giscannermodule.c
index 9148960f..7f31eba0 100644
--- a/giscanner/giscannermodule.c
+++ b/giscanner/giscannermodule.c
@@ -677,171 +677,15 @@ static const PyMethodDef _PyGISourceScanner_methods[] = {
{ NULL, NULL, 0 }
};
-
-static int calc_attrs_length(PyObject *attributes, int indent,
- int self_indent)
-{
- int attr_length = 0;
- int i;
-
- if (indent == -1)
- return -1;
-
- for (i = 0; i < PyList_Size (attributes); ++i)
- {
- PyObject *tuple, *pyvalue;
- PyObject *s = NULL;
- char *attr, *value;
- char *escaped;
-
- tuple = PyList_GetItem (attributes, i);
- if (PyTuple_GetItem(tuple, 1) == Py_None)
- continue;
-
- if (!PyArg_ParseTuple(tuple, "sO", &attr, &pyvalue))
- return -1;
-
- if (PyUnicode_Check(pyvalue)) {
- s = PyUnicode_AsUTF8String(pyvalue);
- if (!s) {
- return -1;
- }
- value = PyBytes_AsString(s);
- } else if (PyBytes_Check(pyvalue)) {
- value = PyBytes_AsString(pyvalue);
- } else {
- PyErr_SetString(PyExc_TypeError,
- "value must be string or unicode");
- return -1;
- }
-
- escaped = g_markup_escape_text (value, -1);
- attr_length += 2 + strlen(attr) + strlen(escaped) + 2;
- g_free(escaped);
- Py_XDECREF(s);
- }
-
- return attr_length + indent + self_indent;
-}
-
-/* Hall of shame, wasted time debugging the code below
- * 20min - Johan 2009-02-19
- */
-static PyObject *
-pygi_collect_attributes (PyObject *self,
- PyObject *args)
-{
- char *tag_name;
- PyObject *attributes;
- int indent, indent_len, i, j, self_indent;
- char *indent_char;
- gboolean first;
- GString *attr_value = NULL;
- int len;
- PyObject *result = NULL;
-
- if (!PyArg_ParseTuple(args, "sO!isi",
- &tag_name, &PyList_Type, &attributes,
- &self_indent, &indent_char,
- &indent))
- return NULL;
-
- if (attributes == Py_None || !PyList_Size(attributes))
- return PyUnicode_DecodeUTF8("", 0, "strict");
-
- len = calc_attrs_length(attributes, indent, self_indent);
- if (len < 0)
- return NULL;
- if (len > 79)
- indent_len = self_indent + strlen(tag_name) + 1;
- else
- indent_len = 0;
-
- first = TRUE;
- attr_value = g_string_new ("");
-
- for (i = 0; i < PyList_Size (attributes); ++i)
- {
- PyObject *tuple, *pyvalue;
- PyObject *s = NULL;
- char *attr, *value, *escaped;
-
- tuple = PyList_GetItem (attributes, i);
-
- if (!PyTuple_Check (tuple))
- {
- PyErr_SetString(PyExc_TypeError,
- "attribute item must be a tuple");
- goto out;
- }
-
- if (PyTuple_Size (tuple) != 2)
- {
- PyErr_SetString(PyExc_IndexError,
- "attribute item must be a tuple of length 2");
- goto out;
- }
-
- if (PyTuple_GetItem(tuple, 1) == Py_None)
- continue;
-
- /* this leaks, but we exit after, so */
- if (!PyArg_ParseTuple(tuple, "sO", &attr, &pyvalue))
- goto out;
-
- if (PyUnicode_Check(pyvalue)) {
- s = PyUnicode_AsUTF8String(pyvalue);
- if (!s)
- goto out;
- value = PyBytes_AsString(s);
- } else if (PyBytes_Check(pyvalue)) {
- value = PyBytes_AsString(pyvalue);
- } else {
- PyErr_SetString(PyExc_TypeError,
- "value must be string or unicode");
- goto out;
- }
-
- if (indent_len && !first)
- {
- g_string_append_c (attr_value, '\n');
- for (j = 0; j < indent_len; j++)
- g_string_append_c (attr_value, ' ');
- }
- g_string_append_c (attr_value, ' ');
- g_string_append (attr_value, attr);
- g_string_append_c (attr_value, '=');
- g_string_append_c (attr_value, '\"');
- escaped = g_markup_escape_text (value, -1);
- g_string_append (attr_value, escaped);
- g_string_append_c (attr_value, '\"');
- if (first)
- first = FALSE;
- Py_XDECREF(s);
- }
-
- result = PyUnicode_DecodeUTF8 (attr_value->str, attr_value->len, "strict");
- out:
- if (attr_value != NULL)
- g_string_free (attr_value, TRUE);
- return result;
-}
-
/* Module */
-static PyMethodDef pyscanner_functions[] = {
- { "collect_attributes",
- (PyCFunction) pygi_collect_attributes, METH_VARARGS },
- { NULL, NULL, 0, NULL }
-};
-
#if PY_MAJOR_VERSION >= 3
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
NULL, /* m_name */
NULL, /* m_doc */
0,
- pyscanner_functions,
+ NULL,
NULL
};
#endif /* PY_MAJOR_VERSION >= 3 */
@@ -864,7 +708,7 @@ MOD_INIT(_giscanner)
moduledef.m_name = module_name;
m = PyModule_Create (&moduledef);
#else
- m = Py_InitModule (module_name, (PyMethodDef*)pyscanner_functions);
+ m = Py_InitModule (module_name, NULL);
#endif
d = PyModule_GetDict (m);
diff --git a/giscanner/xmlwriter.py b/giscanner/xmlwriter.py
index f79362a1..a65fc40c 100755
--- a/giscanner/xmlwriter.py
+++ b/giscanner/xmlwriter.py
@@ -28,9 +28,7 @@ import os
import sys
from contextlib import contextmanager
-from xml.sax.saxutils import escape
-
-from .libtoolimporter import LibtoolImporter
+from xml.sax.saxutils import escape, quoteattr
if sys.version_info.major < 3:
from StringIO import StringIO
@@ -39,11 +37,43 @@ else:
unicode = str
-with LibtoolImporter(None, None):
- if 'UNINSTALLED_INTROSPECTION_SRCDIR' in os.environ:
- from _giscanner import collect_attributes
+def _calc_attrs_length(attributes, indent, self_indent):
+ if indent == -1:
+ return -1
+ attr_length = 0
+ for attr, value in attributes:
+ # FIXME: actually, if we have attributes with None as value this
+ # should be considered a bug and raise an error. We are just
+ # ignoring them here while we fix GIRParser to create the right
+ # ast with the correct attributes.
+ if value is None:
+ continue
+ attr_length += 2 + len(attr) + len(quoteattr(value))
+ return attr_length + indent + self_indent
+
+
+def collect_attributes(tag_name, attributes, self_indent, self_indent_char, indent=-1):
+ if not attributes:
+ return ''
+ if _calc_attrs_length(attributes, indent, self_indent) > 79:
+ indent_len = self_indent + len(tag_name) + 1
else:
- from giscanner._giscanner import collect_attributes
+ indent_len = 0
+ first = True
+ attr_value = ''
+ for attr, value in attributes:
+ # FIXME: actually, if we have attributes with None as value this
+ # should be considered a bug and raise an error. We are just
+ # ignoring them here while we fix GIRParser to create the right
+ # ast with the correct attributes.
+ if value is None:
+ continue
+ if indent_len and not first:
+ attr_value += '\n%s' % (self_indent_char * indent_len)
+ attr_value += ' %s=%s' % (attr, quoteattr(value))
+ if first:
+ first = False
+ return attr_value
def build_xml_tag(tag_name, attributes=None, data=None, self_indent=0,
diff --git a/tests/scanner/test_xmlwriter.py b/tests/scanner/test_xmlwriter.py
index e24923bd..c6748c00 100644
--- a/tests/scanner/test_xmlwriter.py
+++ b/tests/scanner/test_xmlwriter.py
@@ -40,10 +40,16 @@ class TestXMLWriter(unittest.TestCase):
self.assertEqual(res, '<tag attr="utf8"/>')
res = build_xml_tag('tag', [('attr', 'foo\nbar')])
- self.assertEqual(res, '<tag attr="foo\nbar"/>')
+ self.assertEqual(res, '<tag attr="foo&#10;bar"/>')
+
+ res = build_xml_tag('tag', [('attr', 'foo\tbar')])
+ self.assertEqual(res, '<tag attr="foo&#9;bar"/>')
res = build_xml_tag('tag', [('attr', '\004')])
- self.assertEqual(res, '<tag attr="&#x4;"/>')
+ self.assertEqual(res, '<tag attr="\x04"/>')
+
+ res = build_xml_tag('tag', [('attr', 'limba1\t\034')])
+ self.assertEqual(res, '<tag attr="limba1&#9;\034"/>')
res = build_xml_tag('tag', [('attr', '')])
self.assertEqual(res, '<tag attr=""/>')
@@ -57,6 +63,9 @@ class TestXMLWriter(unittest.TestCase):
res = build_xml_tag('tag', [('a', 'b'), ('c', 'd')])
self.assertEqual(res, '<tag a="b" c="d"/>')
+ res = build_xml_tag('tag', [('foo', None), ('bar', 'quux')])
+ self.assertEqual(res, '<tag bar="quux"/>')
+
def test_build_xml_tag_data(self):
res = build_xml_tag('tag', [], b'foo')
self.assertEqual(res, '<tag>foo</tag>')