#!/usr/bin/env python3 # doc_postprocess.py [-h|--help] ... # Post-process the Doxygen-generated HTML files matching pattern. import os import sys import re import glob # Substitutions with regular expressions are somewhat slow in Python 3.9.5. # Use str.replace() rather than re.sub() where possible. # [search string, compiled regular expression or None, substitution string, count] class_el_patterns = [ # return value [ ' & ', re.compile(r' & *'), '& ', 1], [ ' * ', re.compile(r' \* *'), '* ', 1], # parameters [ ' &', None, '&', 0], [ '&', re.compile(r'&\b'), '& ', 0], [ ' *', None, '*', 0], [ '*', re.compile(r'\*\b'), '* ', 0], # templates [ 'template<', re.compile(r'\btemplate<'), 'template <', 1] ] class_md_patterns = [ # left parenthesis [ '( ', re.compile(r'\( *'), '(', 1], # return value [ ' & ', None, '& ', 0], [ ' * ', None, '* ', 0], # parameters [ ' & ', re.compile(r' & *'), '& ', 0], [ ' * ', re.compile(r' \* *'), '* ', 0], # templates [ 'template<', re.compile(r'\btemplate<'), 'template <', 1] ] else_patterns = [ # template decls [ 'template<', re.compile(r'^(|)template<'), '\\1template <', 1] ] all_lines_patterns = [ # For some reason, some versions of Doxygen output the full path to # referenced tag files. This is bad since it breaks doc_install.py, # and also because it leaks local path names into source tarballs. # Thus, strip the directory prefix here. [ ' doxygen="', re.compile(r' doxygen="[^":]*/([^":]+\.tag):'), ' doxygen="\\1:', 0], [ '©', None, '©', 0], [ '—', None, '—', 0], [ '–', None, '–', 0], [ ' ', re.compile(r' * *'), ' ', 0] ] def doc_postprocess(patterns): if not (isinstance(patterns, list) or isinstance(patterns, tuple)): patterns = [] if patterns == None else [patterns] filepaths = [] for pattern in patterns: filepaths += glob.glob(pattern) for filepath in filepaths: # Assume that the file is UTF-8 encoded. # If illegal UTF-8 bytes in the range 0x80..0xff are encountered, they are # replaced by Unicode Private Use characters in the range 0xdc80..0xdcff # and restored to their original values when the file is rewritten. with open(filepath, mode='r', encoding='utf-8', errors='surrogateescape') as file: # Read the whole file into a buffer, a list with one line per element. buf = file.readlines() for line_number in range(len(buf)): line = buf[line_number] # Substitute if '