#!/usr/bin/python3 # canonicalize html dirs to ease comaring them # # run as: # ./tools/c10e-html html import argparse import glob import re import os import sys from bs4 import BeautifulSoup def prettify(filename, parser='lxml', fixup=False): with open(filename, 'r') as doc: soup = BeautifulSoup(doc.read(), parser) with open(filename, 'w') as doc: html = soup.prettify() if fixup: # strip things that mkhtml2 is not producing to reduce the diff html = html.replace('a class="link" href', 'a href') html = html.replace(' target="_top"', '') html = html.replace('summary="Navigation header" ', '') html = html.replace(""" """, '') html = re.sub("""
""", '', html) html = re.sub(r'\s*\s*
', '', html) html = re.sub(r'\s*\s*', '', html) html = re.sub(r'\s*