#!/usr/bin/env python3
# -*- Mode: Python; py-indent-offset: 4 -*-
#
# This litte script outputs the C doc comments to an XML format.
# So far it's only used by gtkmm (The C++ bindings). Murray Cumming.
# Usage example:
# # ./docextract_to_xml.py -s /gnome/head/cvs/gtk+/gtk/ -s /gnome/head/cvs/gtk+/docs/reference/gtk/tmpl/ > gtk_docs.xml
import getopt
import re
import sys
import docextract
def usage():
sys.stderr.write('usage: docextract_to_xml.py ' +
'[-s /src/dir | --source-dir=/src/dir] ' +
'[-x /src/dir/file-to-exclude | --exclude-file=/src/dir/file-to-exclude] ' +
'[-a | --with-annotations] [-p | --with-properties] ' +
'[-c | --with-sections] [-r | --no-recursion] ' +
'[-n | --no-since] [-i | --no-signals ] [-e | --no-enums ]\n')
sys.exit(1)
# Translates special texts to &... HTML acceptable format. Also replace
# occurrences of '/*' and '*/' with '/ *' and '* /' respectively to avoid
# comment errors (note the spaces). Some function descriptions include C++
# multi-line comments which cause errors when the description is included in a
# C++ Doxygen comment block.
def escape_text(unescaped_text):
# Escape every "&" not part of an entity reference
escaped_text = re.sub(r'&(?![A-Za-z]+;)', '&', unescaped_text)
# These weird entities turn up in the output...
escaped_text = escaped_text.replace('—', '—')
escaped_text = escaped_text.replace('*', '*')
escaped_text = escaped_text.replace('%', '%')
escaped_text = escaped_text.replace('@', '@')
escaped_text = escaped_text.replace(':', ':')
escaped_text = escaped_text.replace('#', '#')
escaped_text = escaped_text.replace(' ', ' ')
escaped_text = escaped_text.replace('&solidus;', '/')
escaped_text = escaped_text.replace('π', '∏')
escaped_text = escaped_text.replace('⇒', '⇒')
# This represents a '/' before or after an '*' so replace with slash but
# with spaces.
escaped_text = escaped_text.replace('/', ' / ')
# Escape for both tag contents and attribute values
escaped_text = escaped_text.replace('<', '<')
escaped_text = escaped_text.replace('>', '>')
escaped_text = escaped_text.replace('"', '"')
# Replace C++ comment begin and ends to ones that don't affect Doxygen.
escaped_text = escaped_text.replace('/*', '/ *')
escaped_text = escaped_text.replace('*/', '* /')
return escaped_text
def print_annotations(annotations):
for annotation in annotations:
print("" + \
escape_text(annotation[1]) + "")
if __name__ == '__main__':
try:
opts, args = getopt.getopt(sys.argv[1:], "s:x:apcrnie",
["source-dir=", "exclude-file=",
"with-annotations", "with-properties",
"with-sections", "no-recursion", "no-since",
"no-signals", "no-enums"])
except getopt.error as e:
sys.stderr.write('docextract_to_xml.py: %s\n' % e)
usage()
source_dirs = []
exclude_files = []
with_annotations = False
with_signals = True
with_properties = False
with_sections = False
with_enums = True
for opt, arg in opts:
if opt in ('-s', '--source-dir'):
source_dirs.append(arg)
elif opt in ('-x', '--exclude-file'):
exclude_files.append(arg)
elif opt in ('-a', '--with-annotations'):
with_annotations = True
elif opt in ('-p', '--with-properties'):
with_properties = True
elif opt in ('-c', '--with-sections'):
with_sections = True
elif opt in ('-r', '--no-recursion'):
docextract.no_recursion = True
elif opt in ('-n', '--no-since'):
docextract.no_since = True
elif opt in ('-i', '--no-signals'):
with_signals = False
elif opt in ('-e', '--no-enums'):
with_enums = False
if len(args) != 0:
usage()
docs = docextract.extract(source_dirs, exclude_files);
docextract.extract_tmpl(source_dirs, exclude_files, docs); #Try the tmpl sgml files too.
# print d.docs
if docs:
print("")
for name, value in sorted(docs.items()):
# Get the type of comment block ('function', 'signal',
# 'property', 'section' or 'enum') (the value is a GtkDoc).
block_type = value.get_type()
# Skip signals if the option was not specified.
if block_type == 'signal' and not with_signals:
continue
# Likewise for properties.
elif block_type == 'property' and not with_properties:
continue
# Likewise for sections.
elif block_type == 'section':
if not with_sections:
continue
# Delete 'SECTION:' from the name.
# (It could easily be deleted by docextract.extract(), but then
# there would be a theoretical risk that a section name would
# be identical to a function name, when all kinds of elements
# are stored in the docs dictionary with their names as key.)
last_colon_pos = name.rfind(':')
if last_colon_pos >= 0:
name = name[last_colon_pos+1:]
# Likewise for enums.
elif block_type == 'enum' and not with_enums:
continue
print("<" + block_type + " name=\"" + escape_text(name) + "\">")
print("")
print(escape_text(value.get_description()))
print("")
# Loop through the parameters if not dealing with a property:
if block_type != 'property':
print("")
for name, description, annotations in value.params:
print("")
print("" + escape_text(description) + "")
if with_annotations:
print_annotations(annotations)
print("")
print("")
if block_type not in ('property', 'section', 'enum'):
# Show the return-type if not dealing with a property, section
# or enum:
if with_annotations:
print("")
print("" + escape_text(value.ret[0]) + \
"")
print_annotations(value.ret[1])
print("")
else:
print("" + escape_text(value.ret[0]) + "")
if with_annotations:
print_annotations(value.get_annotations())
print("" + block_type + ">\n")
print("")