2 files changed, 199 insertions, 18 deletions
diff --git a/giscanner/docbookdescription.py b/giscanner/docbookdescription.py
new file mode 100644
index 00000000..70672ac7
--- /dev/null
+++ b/giscanner/docbookdescription.py
@@ -0,0 +1,185 @@
+
+TAG_PROGRAM_LISTING = '<programlisting'
+TAG_CDATA = '<![CDATA['
+TAGS =  {TAG_PROGRAM_LISTING, TAG_CDATA, ']]>', '</programlisting>'}
+
+def get_formatted_description(description):
+    desc = description.replace("|[", "<informalexample><programlisting>") \
+                          .replace("]|", "</programlisting></informalexample>")
+
+    desc = "<para>%s</para>" % desc
+
+# we still need to handle this case
+#    # Handle "#include <xxxxx>"
+#    $text =~ s/#include(\s+)<([^>]+)>/#include$1&lt;$2&gt;/g;
+
+    formatted_desc = ""
+
+    inside_tags = []
+    last_offset = 0
+    for start, end, tag in _find_xml_tag_matches(desc):
+        if len(inside_tags) == 0:
+            new_desc = "\n</para>\n<para>\n".join(desc[last_offset:start].split('\n\n'))
+        else:
+            new_desc = desc[last_offset:start]
+
+        if TAG_CDATA not in inside_tags:
+            new_desc = _escape_non_cdata_section(new_desc)
+
+        formatted_desc += new_desc
+        formatted_desc += tag
+        if tag == TAG_PROGRAM_LISTING:
+            formatted_desc += '>'
+
+        if tag in (TAG_CDATA, TAG_PROGRAM_LISTING):
+            inside_tags.append(tag)
+        else:
+            try:
+                inside_tags.pop()
+            except IndexError:
+                print "Error: mismatched tag:", tag
+        last_offset = end
+
+    formatted_desc += _escape_non_cdata_section(desc[last_offset:])
+    return formatted_desc
+
+def _find_xml_tag_matches(string):
+    offset = 0
+    while True:
+        indexes = []
+        for tag in TAGS:
+            pos = string.find(tag, offset)
+            if pos != -1:
+                indexes.append((tag, pos))
+
+        if indexes:
+            tag, first = min(indexes, key=lambda x: x[1])
+            if tag == TAG_PROGRAM_LISTING:
+                end = string.find('>', first + len(tag) - 1) + 1
+            else:
+                end = first + len(tag)
+            offset = end
+            yield first, end, tag
+        else:
+            return
+
+def _escape_non_cdata_section(string):
+    string = _escape_ampersand_not_in_entity(string)
+    string = _escape_lt_not_in_xml_tag(string)
+    return _escape_gt_not_in_xml_tag(string)
+
+def _escape_ampersand_not_in_entity(string):
+    parts = string.split('&')
+
+    output = parts[0]
+    for part in parts[1:]:
+        end = part.find(';')
+        if end == -1 or not part[:end].isalpha():
+            output += "&amp;"
+        else:
+            output += "&"
+        output += part
+
+    return output
+
+def _is_valid_xml_tag_name(name):
+    if len(name) < 1:
+        return False
+    elif name.isalpha() or (name[0].isalpha() and name[1:].isalnum()):
+        return True
+
+def _is_valid_xml_tag(string):
+    # handle case where line end is between tag name and first argument.
+    # ie. <link\nlinkend="link-id">My Link</link>
+    string = string.replace('\n', ' ')
+
+    if string[-1] == '/':
+        string = string[:-1]
+
+    # string is the inner part of the tag, without < and >
+    if string[0] == '/' and _is_valid_xml_tag_name(string[1:]):
+        #valid end tag
+        return True
+    elif _is_valid_xml_tag_name(string):
+        #valid start tag with not params
+        return True
+    elif " " in string:
+        # we are looking for: <tagname arg="value" arg2="value2">
+        # TODO: handle spaces in values (between quotations)
+        tagname, rest = string.split(" ", 1)
+        if not _is_valid_xml_tag_name(tagname):
+            return False
+
+        while rest.strip():
+            rest = rest.lstrip()
+
+            if not '=' in rest:
+                return False
+            argname, rest = rest.split('=', 1)
+            if not _is_valid_xml_tag_name(argname):
+                return False
+            if rest[0] != '"':
+                return False
+            value, rest = rest[1:].split('"', 1)
+
+        return True
+
+def _escape_lt_not_in_xml_tag(string):
+    parts = string.split('<')
+
+    output = parts[0]
+    for part in parts[1:]:
+        end = part.find('>')
+        if end == -1 or not _is_valid_xml_tag(part[:end]):
+            output += "&lt;"
+        else:
+            output += "<"
+        output += part
+
+    return output
+
+def _escape_gt_not_in_xml_tag(string):
+    parts = string.split('>')
+
+    output = parts[0]
+    for part in parts[1:]:
+        start = output.rfind('<')
+        if start == -1 or not _is_valid_xml_tag(output[start+1:]):
+            output += "&gt;"
+        else:
+            output += ">"
+        output += part
+
+    return output
+
+
+def test():
+    assert _is_valid_xml_tag_name('a')
+    assert _is_valid_xml_tag_name('refsect1')
+    assert not _is_valid_xml_tag_name('1refsect')
+    assert not _is_valid_xml_tag_name('1')
+
+    assert _is_valid_xml_tag('/a')
+    assert _is_valid_xml_tag('/refsect1')
+    assert not _is_valid_xml_tag('/1')
+    assert _is_valid_xml_tag('link')
+    assert _is_valid_xml_tag('link linkend="value"')
+    assert _is_valid_xml_tag('link  linkend="value"')
+    assert _is_valid_xml_tag('link/')
+    assert _is_valid_xml_tag('link linkend="value"/')
+    assert _is_valid_xml_tag('link linkend="value" arg23="anothervalue"')
+    assert _is_valid_xml_tag('link linkend="value" arg23="anothervalue with spaces"')
+    assert not _is_valid_xml_tag('link linkend="value arg23="anothervalue with spaces"')
+    assert not _is_valid_xml_tag('link linkend')
+    assert _is_valid_xml_tag('link\nlinkend="link-id"')
+    assert _is_valid_xml_tag('xref linkend="gtkstylecontext-classes"/')
+
+    assert _is_valid_xml_tag('a href="http://www.gtk.org" title="&lt;i&gt;Our&lt;/i&gt; website"')
+    assert _is_valid_xml_tag('ulink \nurl="http://www.freedesktop.org/Standards/wm-spec"')
+
+    string = 'gtk_label_set_markup (label, "Go to the <a href="http://www.gtk.org" title="&lt;i&gt;Our&lt;/i&gt; website">GTK+ website</a> for more...");'
+    assert _escape_lt_not_in_xml_tag(string) == string
+
+if __name__ == '__main__':
+    test()
+
diff --git a/giscanner/docbookwriter.py b/giscanner/docbookwriter.py
index a170fe70..182b513d 100644
--- a/giscanner/docbookwriter.py
+++ b/giscanner/docbookwriter.py
@@ -25,6 +25,7 @@ import sys
 from . import ast
 from .girparser import GIRParser
 from .xmlwriter import XMLWriter
+from .docbookdescription import get_formatted_description
 
 XMLNS = "http://docbook.org/ns/docbook"
 XMLVERSION = "5.0"
@@ -467,23 +468,12 @@ class DocBookWriter(object):
                         for entity in page.get_signals():
                             self._formatter.render_signal(entity, link=True)
 
-            # if page.description:
-            #     with self._writer.tagcontext(
-            #         'refsect1',
-            #         [('id', '%s.description' % (page.name, )),
-            #          ]):
-            #         self._writer.write_tag(
-            #             "title", [("role", "desc.title")], "Description")
-            #         import cgi
-            #         desc = page.description
-            #         while True:
-            #             start = desc.find('|[')
-            #             if start == -1:
-            #                 break
-            #             end = desc.find(']|')
-            #             desc = desc[:start] + cgi.escape(desc[start+2:end]) + desc[end+2:]
-            #         desc = desc.replace("&", "&amp;")
-            #         self._writer.write_line(desc)
+            if page.description:
+                with self._writer.tagcontext('refsect1',
+                                            [('id', '%s.description' % (page.name, ))]):
+                    self._writer.write_tag(
+                        "title", [("role", "desc.title")], "Description")
+                    self._render_description(page.description)
 
             with self._writer.tagcontext('refsect1',
                                         [('id', "%s-details" % page.id.lower()),
@@ -555,7 +545,9 @@ class DocBookWriter(object):
         with self._writer.tagcontext("programlisting"):
             self._formatter.render_method(entity)
 
-        self._writer.write_tag("para", [], entity.get_ast().doc)
+        description = entity.get_ast().doc
+        if description:
+            self._render_description(entity.get_ast().doc)
 
         with self._writer.tagcontext("variablelist", [("role", "params")]):
             self._formatter.render_param_list(entity)
@@ -583,6 +575,10 @@ class DocBookWriter(object):
         self._writer.write_line("\n".join(lines))
         self._writer.enable_whitespace()
 
+    def _render_description(self, description):
+        formatted_desc = get_formatted_description(description)
+        self._writer.write_line(formatted_desc)
+
     def _get_parent_chain(self, page_node):
         parent_chain = []