summaryrefslogtreecommitdiff
path: root/giscanner
diff options
context:
space:
mode:
authorLaszlo Pandy <lpandy@src.gnome.org>2011-08-14 11:51:04 +0200
committerLaszlo Pandy <lpandy@src.gnome.org>2011-08-16 19:50:29 +0200
commitcfeb773b072259e720150559f29ffbd26bb70874 (patch)
tree3a031c271cabee4c12173fd40b4115c4289d7d24 /giscanner
parent169b206cbb4b347e4b17854e8f0c62a40404f803 (diff)
downloadgobject-introspection-cfeb773b072259e720150559f29ffbd26bb70874.tar.gz
Descriptions support for docbook writer.
Includes support for escaping (but not double escaping) entities and CDATA sections. Add descriptions from GIR to docbook writer.
Diffstat (limited to 'giscanner')
-rw-r--r--giscanner/docbookdescription.py185
-rw-r--r--giscanner/docbookwriter.py32
2 files changed, 199 insertions, 18 deletions
diff --git a/giscanner/docbookdescription.py b/giscanner/docbookdescription.py
new file mode 100644
index 00000000..70672ac7
--- /dev/null
+++ b/giscanner/docbookdescription.py
@@ -0,0 +1,185 @@
+
+TAG_PROGRAM_LISTING = '<programlisting'
+TAG_CDATA = '<![CDATA['
+TAGS = {TAG_PROGRAM_LISTING, TAG_CDATA, ']]>', '</programlisting>'}
+
+def get_formatted_description(description):
+ desc = description.replace("|[", "<informalexample><programlisting>") \
+ .replace("]|", "</programlisting></informalexample>")
+
+ desc = "<para>%s</para>" % desc
+
+# we still need to handle this case
+# # Handle "#include <xxxxx>"
+# $text =~ s/#include(\s+)<([^>]+)>/#include$1&lt;$2&gt;/g;
+
+ formatted_desc = ""
+
+ inside_tags = []
+ last_offset = 0
+ for start, end, tag in _find_xml_tag_matches(desc):
+ if len(inside_tags) == 0:
+ new_desc = "\n</para>\n<para>\n".join(desc[last_offset:start].split('\n\n'))
+ else:
+ new_desc = desc[last_offset:start]
+
+ if TAG_CDATA not in inside_tags:
+ new_desc = _escape_non_cdata_section(new_desc)
+
+ formatted_desc += new_desc
+ formatted_desc += tag
+ if tag == TAG_PROGRAM_LISTING:
+ formatted_desc += '>'
+
+ if tag in (TAG_CDATA, TAG_PROGRAM_LISTING):
+ inside_tags.append(tag)
+ else:
+ try:
+ inside_tags.pop()
+ except IndexError:
+ print "Error: mismatched tag:", tag
+ last_offset = end
+
+ formatted_desc += _escape_non_cdata_section(desc[last_offset:])
+ return formatted_desc
+
+def _find_xml_tag_matches(string):
+ offset = 0
+ while True:
+ indexes = []
+ for tag in TAGS:
+ pos = string.find(tag, offset)
+ if pos != -1:
+ indexes.append((tag, pos))
+
+ if indexes:
+ tag, first = min(indexes, key=lambda x: x[1])
+ if tag == TAG_PROGRAM_LISTING:
+ end = string.find('>', first + len(tag) - 1) + 1
+ else:
+ end = first + len(tag)
+ offset = end
+ yield first, end, tag
+ else:
+ return
+
+def _escape_non_cdata_section(string):
+ string = _escape_ampersand_not_in_entity(string)
+ string = _escape_lt_not_in_xml_tag(string)
+ return _escape_gt_not_in_xml_tag(string)
+
+def _escape_ampersand_not_in_entity(string):
+ parts = string.split('&')
+
+ output = parts[0]
+ for part in parts[1:]:
+ end = part.find(';')
+ if end == -1 or not part[:end].isalpha():
+ output += "&amp;"
+ else:
+ output += "&"
+ output += part
+
+ return output
+
+def _is_valid_xml_tag_name(name):
+ if len(name) < 1:
+ return False
+ elif name.isalpha() or (name[0].isalpha() and name[1:].isalnum()):
+ return True
+
+def _is_valid_xml_tag(string):
+ # handle case where line end is between tag name and first argument.
+ # ie. <link\nlinkend="link-id">My Link</link>
+ string = string.replace('\n', ' ')
+
+ if string[-1] == '/':
+ string = string[:-1]
+
+ # string is the inner part of the tag, without < and >
+ if string[0] == '/' and _is_valid_xml_tag_name(string[1:]):
+ #valid end tag
+ return True
+ elif _is_valid_xml_tag_name(string):
+ #valid start tag with not params
+ return True
+ elif " " in string:
+ # we are looking for: <tagname arg="value" arg2="value2">
+ # TODO: handle spaces in values (between quotations)
+ tagname, rest = string.split(" ", 1)
+ if not _is_valid_xml_tag_name(tagname):
+ return False
+
+ while rest.strip():
+ rest = rest.lstrip()
+
+ if not '=' in rest:
+ return False
+ argname, rest = rest.split('=', 1)
+ if not _is_valid_xml_tag_name(argname):
+ return False
+ if rest[0] != '"':
+ return False
+ value, rest = rest[1:].split('"', 1)
+
+ return True
+
+def _escape_lt_not_in_xml_tag(string):
+ parts = string.split('<')
+
+ output = parts[0]
+ for part in parts[1:]:
+ end = part.find('>')
+ if end == -1 or not _is_valid_xml_tag(part[:end]):
+ output += "&lt;"
+ else:
+ output += "<"
+ output += part
+
+ return output
+
+def _escape_gt_not_in_xml_tag(string):
+ parts = string.split('>')
+
+ output = parts[0]
+ for part in parts[1:]:
+ start = output.rfind('<')
+ if start == -1 or not _is_valid_xml_tag(output[start+1:]):
+ output += "&gt;"
+ else:
+ output += ">"
+ output += part
+
+ return output
+
+
+def test():
+ assert _is_valid_xml_tag_name('a')
+ assert _is_valid_xml_tag_name('refsect1')
+ assert not _is_valid_xml_tag_name('1refsect')
+ assert not _is_valid_xml_tag_name('1')
+
+ assert _is_valid_xml_tag('/a')
+ assert _is_valid_xml_tag('/refsect1')
+ assert not _is_valid_xml_tag('/1')
+ assert _is_valid_xml_tag('link')
+ assert _is_valid_xml_tag('link linkend="value"')
+ assert _is_valid_xml_tag('link linkend="value"')
+ assert _is_valid_xml_tag('link/')
+ assert _is_valid_xml_tag('link linkend="value"/')
+ assert _is_valid_xml_tag('link linkend="value" arg23="anothervalue"')
+ assert _is_valid_xml_tag('link linkend="value" arg23="anothervalue with spaces"')
+ assert not _is_valid_xml_tag('link linkend="value arg23="anothervalue with spaces"')
+ assert not _is_valid_xml_tag('link linkend')
+ assert _is_valid_xml_tag('link\nlinkend="link-id"')
+ assert _is_valid_xml_tag('xref linkend="gtkstylecontext-classes"/')
+
+ assert _is_valid_xml_tag('a href="http://www.gtk.org" title="&lt;i&gt;Our&lt;/i&gt; website"')
+ assert _is_valid_xml_tag('ulink \nurl="http://www.freedesktop.org/Standards/wm-spec"')
+
+ string = 'gtk_label_set_markup (label, "Go to the <a href="http://www.gtk.org" title="&lt;i&gt;Our&lt;/i&gt; website">GTK+ website</a> for more...");'
+ assert _escape_lt_not_in_xml_tag(string) == string
+
+if __name__ == '__main__':
+ test()
+
diff --git a/giscanner/docbookwriter.py b/giscanner/docbookwriter.py
index a170fe70..182b513d 100644
--- a/giscanner/docbookwriter.py
+++ b/giscanner/docbookwriter.py
@@ -25,6 +25,7 @@ import sys
from . import ast
from .girparser import GIRParser
from .xmlwriter import XMLWriter
+from .docbookdescription import get_formatted_description
XMLNS = "http://docbook.org/ns/docbook"
XMLVERSION = "5.0"
@@ -467,23 +468,12 @@ class DocBookWriter(object):
for entity in page.get_signals():
self._formatter.render_signal(entity, link=True)
- # if page.description:
- # with self._writer.tagcontext(
- # 'refsect1',
- # [('id', '%s.description' % (page.name, )),
- # ]):
- # self._writer.write_tag(
- # "title", [("role", "desc.title")], "Description")
- # import cgi
- # desc = page.description
- # while True:
- # start = desc.find('|[')
- # if start == -1:
- # break
- # end = desc.find(']|')
- # desc = desc[:start] + cgi.escape(desc[start+2:end]) + desc[end+2:]
- # desc = desc.replace("&", "&amp;")
- # self._writer.write_line(desc)
+ if page.description:
+ with self._writer.tagcontext('refsect1',
+ [('id', '%s.description' % (page.name, ))]):
+ self._writer.write_tag(
+ "title", [("role", "desc.title")], "Description")
+ self._render_description(page.description)
with self._writer.tagcontext('refsect1',
[('id', "%s-details" % page.id.lower()),
@@ -555,7 +545,9 @@ class DocBookWriter(object):
with self._writer.tagcontext("programlisting"):
self._formatter.render_method(entity)
- self._writer.write_tag("para", [], entity.get_ast().doc)
+ description = entity.get_ast().doc
+ if description:
+ self._render_description(entity.get_ast().doc)
with self._writer.tagcontext("variablelist", [("role", "params")]):
self._formatter.render_param_list(entity)
@@ -583,6 +575,10 @@ class DocBookWriter(object):
self._writer.write_line("\n".join(lines))
self._writer.enable_whitespace()
+ def _render_description(self, description):
+ formatted_desc = get_formatted_description(description)
+ self._writer.write_line(formatted_desc)
+
def _get_parent_chain(self, page_node):
parent_chain = []