summaryrefslogtreecommitdiff
path: root/ci/parse_relnotes.py
diff options
context:
space:
mode:
Diffstat (limited to 'ci/parse_relnotes.py')
-rw-r--r--ci/parse_relnotes.py115
1 files changed, 115 insertions, 0 deletions
diff --git a/ci/parse_relnotes.py b/ci/parse_relnotes.py
new file mode 100644
index 00000000..d19e6d60
--- /dev/null
+++ b/ci/parse_relnotes.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+"""
+Parse CHANGES.md into a JSON structure.
+
+Run with two arguments: the .md file to parse, and the JSON file to write:
+
+ python parse_relnotes.py CHANGES.md relnotes.json
+
+Every section that has something that looks like a version number in it will
+be recorded as the release notes for that version.
+
+"""
+
+import json
+import re
+import sys
+
+
+class TextChunkBuffer:
+ """Hold onto text chunks until needed."""
+ def __init__(self):
+ self.buffer = []
+
+ def append(self, text):
+ """Add `text` to the buffer."""
+ self.buffer.append(text)
+
+ def clear(self):
+ """Clear the buffer."""
+ self.buffer = []
+
+ def flush(self):
+ """Produce a ("text", text) tuple if there's anything here."""
+ buffered = "".join(self.buffer).strip()
+ if buffered:
+ yield ("text", buffered)
+ self.clear()
+
+
+def parse_md(lines):
+ """Parse markdown lines, producing (type, text) chunks."""
+ buffer = TextChunkBuffer()
+
+ for line in lines:
+ header_match = re.search(r"^(#+) (.+)$", line)
+ is_header = bool(header_match)
+ if is_header:
+ yield from buffer.flush()
+ hashes, text = header_match.groups()
+ yield (f"h{len(hashes)}", text)
+ else:
+ buffer.append(line)
+
+ yield from buffer.flush()
+
+
+def sections(parsed_data):
+ """Convert a stream of parsed tokens into sections with text and notes.
+
+ Yields a stream of:
+ ('h-level', 'header text', 'text')
+
+ """
+ header = None
+ text = []
+ for ttype, ttext in parsed_data:
+ if ttype.startswith('h'):
+ if header:
+ yield (*header, "\n".join(text))
+ text = []
+ header = (ttype, ttext)
+ elif ttype == "text":
+ text.append(ttext)
+ else:
+ raise Exception(f"Don't know ttype {ttype!r}")
+ yield (*header, "\n".join(text))
+
+
+def refind(regex, text):
+ """Find a regex in some text, and return the matched text, or None."""
+ m = re.search(regex, text)
+ if m:
+ return m.group()
+ else:
+ return None
+
+def relnotes(mdlines):
+ r"""Yield (version, text) pairs from markdown lines.
+
+ Each tuple is a separate version mentioned in the release notes.
+
+ A version is any section with \d\.\d in the header text.
+
+ """
+ for _, htext, text in sections(parse_md(mdlines)):
+ version = refind(r"\d+\.\d[^ ]*", htext)
+ if version:
+ prerelease = any(c in version for c in "abc")
+ when = refind(r"\d+-\d+-\d+", htext)
+ yield {
+ "version": version,
+ "text": text,
+ "prerelease": prerelease,
+ "when": when,
+ }
+
+def parse(md_filename, json_filename):
+ """Main function: parse markdown and write JSON."""
+ with open(md_filename) as mf:
+ markdown = mf.read()
+ with open(json_filename, "w") as jf:
+ json.dump(list(relnotes(markdown.splitlines(True))), jf, indent=4)
+
+if __name__ == "__main__":
+ parse(*sys.argv[1:]) # pylint: disable=no-value-for-parameter