packaging/md2html


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96

#!/usr/bin/python3

# Copyright (C) 2020 Wayne Davison
#
# This program is freely redistributable.

import re, argparse

HTML_START = """\
<html><head>
<title>%s</title>
<link href="https://fonts.googleapis.com/css2?family=Roboto&family=Roboto+Mono&display=swap" rel="stylesheet">
<style>
body {
  max-width: 50em;
  margin: auto;
}
body, b, strong, u {
  font-family: 'Roboto', sans-serif;
}
code {
  font-family: 'Roboto Mono', monospace;
  font-weight: bold;
}
pre code {
  display: block;
  font-weight: normal;
}
blockquote pre code {
  background: #f1f1f1;
}
dd p:first-of-type {
  margin-block-start: 0em;
}
</style>
</head><body>
"""

HTML_END = """\
</body></html>
"""

md_parser = None

def main():
    for mdfn in args.mdfiles:
        if not mdfn.endswith('.md'):
            print('Ignoring non-md input file:', mdfn)
            continue
        title = re.sub(r'.*/', '', mdfn).replace('.md', '')
        htfn = mdfn.replace('.md', '.html')

        print("Parsing", mdfn, '->', htfn)

        with open(mdfn, 'r', encoding='utf-8') as fh:
            txt = fh.read()

        txt = re.sub(r'\s--\s', '\xa0-- ', txt)

        html = md_parser(txt)

        html = re.sub(r'(<code>)([\s\S]*?)(</code>)', lambda m: m[1] + re.sub(r'\s', '\xa0', m[2]) + m[3], html)
        html = html.replace('--', '&#8209;&#8209;').replace("\xa0-", '&nbsp;&#8209;').replace("\xa0", '&nbsp;')
        html = re.sub(r'(\W)-', r'\1&#8209;', html)

        with open(htfn, 'w', encoding='utf-8') as fh:
            fh.write(HTML_START % title)
            fh.write(html)
            fh.write(HTML_END)


def html_via_cmarkgfm(txt):
    return cmarkgfm.markdown_to_html(txt)


def html_via_commonmark(txt):
    return commonmark.HtmlRenderer().render(commonmark.Parser().parse(txt))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Output html for md pages.', add_help=False)
    parser.add_argument("--help", "-h", action="help", help="Output this help message and exit.")
    parser.add_argument("mdfiles", nargs='+', help="The .md files to turn into .html files.")
    args = parser.parse_args()

    try:
        import cmarkgfm
        md_parser = html_via_cmarkgfm
    except:
        try:
            import commonmark
            md_parser = html_via_commonmark
        except:
            die("Failed to find cmarkgfm or commonmark for python3.")

    main()