diff options
-rw-r--r-- | giscanner/docbookdescription.py | 185 | ||||
-rw-r--r-- | giscanner/docbookwriter.py | 32 |
2 files changed, 199 insertions, 18 deletions
diff --git a/giscanner/docbookdescription.py b/giscanner/docbookdescription.py new file mode 100644 index 00000000..70672ac7 --- /dev/null +++ b/giscanner/docbookdescription.py @@ -0,0 +1,185 @@ + +TAG_PROGRAM_LISTING = '<programlisting' +TAG_CDATA = '<![CDATA[' +TAGS = {TAG_PROGRAM_LISTING, TAG_CDATA, ']]>', '</programlisting>'} + +def get_formatted_description(description): + desc = description.replace("|[", "<informalexample><programlisting>") \ + .replace("]|", "</programlisting></informalexample>") + + desc = "<para>%s</para>" % desc + +# we still need to handle this case +# # Handle "#include <xxxxx>" +# $text =~ s/#include(\s+)<([^>]+)>/#include$1<$2>/g; + + formatted_desc = "" + + inside_tags = [] + last_offset = 0 + for start, end, tag in _find_xml_tag_matches(desc): + if len(inside_tags) == 0: + new_desc = "\n</para>\n<para>\n".join(desc[last_offset:start].split('\n\n')) + else: + new_desc = desc[last_offset:start] + + if TAG_CDATA not in inside_tags: + new_desc = _escape_non_cdata_section(new_desc) + + formatted_desc += new_desc + formatted_desc += tag + if tag == TAG_PROGRAM_LISTING: + formatted_desc += '>' + + if tag in (TAG_CDATA, TAG_PROGRAM_LISTING): + inside_tags.append(tag) + else: + try: + inside_tags.pop() + except IndexError: + print "Error: mismatched tag:", tag + last_offset = end + + formatted_desc += _escape_non_cdata_section(desc[last_offset:]) + return formatted_desc + +def _find_xml_tag_matches(string): + offset = 0 + while True: + indexes = [] + for tag in TAGS: + pos = string.find(tag, offset) + if pos != -1: + indexes.append((tag, pos)) + + if indexes: + tag, first = min(indexes, key=lambda x: x[1]) + if tag == TAG_PROGRAM_LISTING: + end = string.find('>', first + len(tag) - 1) + 1 + else: + end = first + len(tag) + offset = end + yield first, end, tag + else: + return + +def _escape_non_cdata_section(string): + string = _escape_ampersand_not_in_entity(string) + string = _escape_lt_not_in_xml_tag(string) + return _escape_gt_not_in_xml_tag(string) + +def _escape_ampersand_not_in_entity(string): + parts = string.split('&') + + output = parts[0] + for part in parts[1:]: + end = part.find(';') + if end == -1 or not part[:end].isalpha(): + output += "&" + else: + output += "&" + output += part + + return output + +def _is_valid_xml_tag_name(name): + if len(name) < 1: + return False + elif name.isalpha() or (name[0].isalpha() and name[1:].isalnum()): + return True + +def _is_valid_xml_tag(string): + # handle case where line end is between tag name and first argument. + # ie. <link\nlinkend="link-id">My Link</link> + string = string.replace('\n', ' ') + + if string[-1] == '/': + string = string[:-1] + + # string is the inner part of the tag, without < and > + if string[0] == '/' and _is_valid_xml_tag_name(string[1:]): + #valid end tag + return True + elif _is_valid_xml_tag_name(string): + #valid start tag with not params + return True + elif " " in string: + # we are looking for: <tagname arg="value" arg2="value2"> + # TODO: handle spaces in values (between quotations) + tagname, rest = string.split(" ", 1) + if not _is_valid_xml_tag_name(tagname): + return False + + while rest.strip(): + rest = rest.lstrip() + + if not '=' in rest: + return False + argname, rest = rest.split('=', 1) + if not _is_valid_xml_tag_name(argname): + return False + if rest[0] != '"': + return False + value, rest = rest[1:].split('"', 1) + + return True + +def _escape_lt_not_in_xml_tag(string): + parts = string.split('<') + + output = parts[0] + for part in parts[1:]: + end = part.find('>') + if end == -1 or not _is_valid_xml_tag(part[:end]): + output += "<" + else: + output += "<" + output += part + + return output + +def _escape_gt_not_in_xml_tag(string): + parts = string.split('>') + + output = parts[0] + for part in parts[1:]: + start = output.rfind('<') + if start == -1 or not _is_valid_xml_tag(output[start+1:]): + output += ">" + else: + output += ">" + output += part + + return output + + +def test(): + assert _is_valid_xml_tag_name('a') + assert _is_valid_xml_tag_name('refsect1') + assert not _is_valid_xml_tag_name('1refsect') + assert not _is_valid_xml_tag_name('1') + + assert _is_valid_xml_tag('/a') + assert _is_valid_xml_tag('/refsect1') + assert not _is_valid_xml_tag('/1') + assert _is_valid_xml_tag('link') + assert _is_valid_xml_tag('link linkend="value"') + assert _is_valid_xml_tag('link linkend="value"') + assert _is_valid_xml_tag('link/') + assert _is_valid_xml_tag('link linkend="value"/') + assert _is_valid_xml_tag('link linkend="value" arg23="anothervalue"') + assert _is_valid_xml_tag('link linkend="value" arg23="anothervalue with spaces"') + assert not _is_valid_xml_tag('link linkend="value arg23="anothervalue with spaces"') + assert not _is_valid_xml_tag('link linkend') + assert _is_valid_xml_tag('link\nlinkend="link-id"') + assert _is_valid_xml_tag('xref linkend="gtkstylecontext-classes"/') + + assert _is_valid_xml_tag('a href="http://www.gtk.org" title="<i>Our</i> website"') + assert _is_valid_xml_tag('ulink \nurl="http://www.freedesktop.org/Standards/wm-spec"') + + string = 'gtk_label_set_markup (label, "Go to the <a href="http://www.gtk.org" title="<i>Our</i> website">GTK+ website</a> for more...");' + assert _escape_lt_not_in_xml_tag(string) == string + +if __name__ == '__main__': + test() + diff --git a/giscanner/docbookwriter.py b/giscanner/docbookwriter.py index a170fe70..182b513d 100644 --- a/giscanner/docbookwriter.py +++ b/giscanner/docbookwriter.py @@ -25,6 +25,7 @@ import sys from . import ast from .girparser import GIRParser from .xmlwriter import XMLWriter +from .docbookdescription import get_formatted_description XMLNS = "http://docbook.org/ns/docbook" XMLVERSION = "5.0" @@ -467,23 +468,12 @@ class DocBookWriter(object): for entity in page.get_signals(): self._formatter.render_signal(entity, link=True) - # if page.description: - # with self._writer.tagcontext( - # 'refsect1', - # [('id', '%s.description' % (page.name, )), - # ]): - # self._writer.write_tag( - # "title", [("role", "desc.title")], "Description") - # import cgi - # desc = page.description - # while True: - # start = desc.find('|[') - # if start == -1: - # break - # end = desc.find(']|') - # desc = desc[:start] + cgi.escape(desc[start+2:end]) + desc[end+2:] - # desc = desc.replace("&", "&") - # self._writer.write_line(desc) + if page.description: + with self._writer.tagcontext('refsect1', + [('id', '%s.description' % (page.name, ))]): + self._writer.write_tag( + "title", [("role", "desc.title")], "Description") + self._render_description(page.description) with self._writer.tagcontext('refsect1', [('id', "%s-details" % page.id.lower()), @@ -555,7 +545,9 @@ class DocBookWriter(object): with self._writer.tagcontext("programlisting"): self._formatter.render_method(entity) - self._writer.write_tag("para", [], entity.get_ast().doc) + description = entity.get_ast().doc + if description: + self._render_description(entity.get_ast().doc) with self._writer.tagcontext("variablelist", [("role", "params")]): self._formatter.render_param_list(entity) @@ -583,6 +575,10 @@ class DocBookWriter(object): self._writer.write_line("\n".join(lines)) self._writer.enable_whitespace() + def _render_description(self, description): + formatted_desc = get_formatted_description(description) + self._writer.write_line(formatted_desc) + def _get_parent_chain(self, page_node): parent_chain = [] |