From 5bf5aa63d6b6742144071a2af896067c21b3752a Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 16 Jul 2021 15:21:48 +0200 Subject: Add a script for backing up Github issues. --- Tools/dump_github_issues.py | 142 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 Tools/dump_github_issues.py (limited to 'Tools') diff --git a/Tools/dump_github_issues.py b/Tools/dump_github_issues.py new file mode 100644 index 000000000..daec51c50 --- /dev/null +++ b/Tools/dump_github_issues.py @@ -0,0 +1,142 @@ +""" +Dump the GitHub issues of the current project to a file (.json.gz). + +Usage: python3 Tools/dump_github_issues.py +""" + +import configparser +import gzip +import json +import os.path + +from datetime import datetime +from urllib.request import urlopen + +GIT_CONFIG_FILE = ".git/config" + + +class RateLimitReached(Exception): + pass + + +def gen_urls(repo): + i = 0 + while True: + yield f"https://api.github.com/repos/{repo}/issues?state=all&per_page=100&page={i}" + i += 1 + + +def read_rate_limit(): + with urlopen("https://api.github.com/rate_limit") as p: + return json.load(p) + + +def parse_rate_limit(limits): + limits = limits['resources']['core'] + return limits['limit'], limits['remaining'], datetime.fromtimestamp(limits['reset']) + + +def load_url(url): + with urlopen(url) as p: + data = json.load(p) + if isinstance(data, dict) and 'rate limit' in data.get('message', ''): + raise RateLimitReached() + + assert isinstance(data, list), type(data) + return data or None # None indicates empty last page + + +def join_list_data(lists): + result = [] + for data in lists: + if not data: + break + result.extend(data) + return result + + +def output_filename(repo): + timestamp = datetime.now() + return f"github_issues_{repo.replace('/', '_')}_{timestamp.strftime('%Y%m%d_%H%M%S')}.json.gz" + + +def write_gzjson(file_name, data, indent=2): + with gzip.open(file_name, "wt", encoding='utf-8') as gz: + json.dump(data, gz, indent=indent) + + +def find_origin_url(git_config=GIT_CONFIG_FILE): + assert os.path.exists(git_config) + parser = configparser.ConfigParser() + parser.read(git_config) + return parser.get('remote "origin"', 'url') + + +def parse_repo_name(git_url): + if git_url.endswith('.git'): + git_url = git_url[:-4] + return '/'.join(git_url.split('/')[-2:]) + + +def dump_issues(repo): + """Main entry point.""" + print(f"Reading issues from repo '{repo}'") + urls = gen_urls(repo) + try: + paged_data = map(load_url, urls) + issues = join_list_data(paged_data) + except RateLimitReached: + limit, remaining, reset_time = parse_rate_limit(read_rate_limit()) + print(f"FAILURE: Rate limits ({limit}) reached, remaining: {remaining}, reset at {reset_time}") + return + + filename = output_filename(repo) + print(f"Writing {len(issues)} to {filename}") + write_gzjson(filename, issues) + + +### TESTS + +def test_join_list_data(): + assert join_list_data([]) == [] + assert join_list_data([[1,2]]) == [1,2] + assert join_list_data([[1,2], [3]]) == [1,2,3] + assert join_list_data([[0], [1,2], [3]]) == [0,1,2,3] + assert join_list_data([[0], [1,2], [[[]],[]]]) == [0,1,2,[[]],[]] + + +def test_output_filename(): + filename = output_filename("re/po") + import re + assert re.match(r"github_issues_re_po_[0-9]{8}_[0-9]{6}\.json", filename) + + +def test_find_origin_url(): + assert find_origin_url() + + +def test_parse_repo_name(): + assert parse_repo_name("https://github.com/cython/cython") == "cython/cython" + assert parse_repo_name("git+ssh://git@github.com/cython/cython.git") == "cython/cython" + assert parse_repo_name("git+ssh://git@github.com/fork/cython.git") == "fork/cython" + + +def test_write_gzjson(): + import tempfile + with tempfile.NamedTemporaryFile() as tmp: + write_gzjson(tmp.name, [{}]) + + # test JSON format + with gzip.open(tmp.name) as f: + assert json.load(f) == [{}] + + # test indentation + with gzip.open(tmp.name) as f: + assert f.read() == b'[\n {}\n]' + + +### MAIN + +if __name__ == '__main__': + repo_name = parse_repo_name(find_origin_url()) + dump_issues(repo_name) -- cgit v1.2.1