summaryrefslogtreecommitdiff
path: root/script/check_changelog.py
blob: 77f2d61b4115b3056cfca8a2374083364bcceec6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/main/LICENSE
# Copyright (c) https://github.com/PyCQA/pylint/blob/main/CONTRIBUTORS.txt

"""Small script to check the changelog. Used by 'changelog.yml' and pre-commit.

If no issue number is provided we only check that proper formatting is respected."""

from __future__ import annotations

import argparse
import re
import sys
from collections.abc import Iterator
from pathlib import Path
from re import Pattern

VALID_ISSUES_KEYWORDS = ["Refs", "Closes", "Follow-up in", "Fixes part of"]
ISSUE_NUMBER_PATTERN = r"#\d{1,5}"
VALID_ISSUE_NUMBER_PATTERN = r"\*[\S\s]*?" + ISSUE_NUMBER_PATTERN
ISSUES_KEYWORDS = "|".join(VALID_ISSUES_KEYWORDS)
PREFIX_CHANGELOG_PATTERN = (
    rf"(\*\s[\S[\n ]+?]*\n\n\s\s({ISSUES_KEYWORDS})) (PyCQA/astroid)?"
)
VALID_CHANGELOG_PATTERN = PREFIX_CHANGELOG_PATTERN + ISSUE_NUMBER_PATTERN

ISSUE_NUMBER_COMPILED_PATTERN = re.compile(ISSUE_NUMBER_PATTERN)
VALID_CHANGELOG_COMPILED_PATTERN: Pattern[str] = re.compile(VALID_CHANGELOG_PATTERN)
VALID_ISSUE_NUMBER_COMPILED_PATTERN: Pattern[str] = re.compile(
    VALID_ISSUE_NUMBER_PATTERN
)

DOC_PATH = (Path(__file__).parent / "../doc/").resolve()
PATH_TO_WHATSNEW = DOC_PATH / "whatsnew"
UNCHECKED_VERSION = [
    # Not checking version prior to 1.0.0 because the issues referenced are a mix
    # between Logilab internal issue and Bitbucket. It's hard to tell, it's
    # inaccessible for Logilab and often dead links for Bitbucket anyway.
    # Not very useful generally, unless you're an open source historian.
    "0.x",
    # Too much Bitbucket issues in this one :
    "1.0",
    "1.1",
    "1.2",
]

NO_CHECK_REQUIRED_FILES = {
    "index.rst",
    "full_changelog_explanation.rst",
    "summary_explanation.rst",
}


def sorted_whatsnew(verbose: bool) -> Iterator[Path]:
    """Return the whats-new in the 'right' numerical order ('9' before '10')"""
    numeric_whatsnew = {}
    for file in PATH_TO_WHATSNEW.glob("**/*"):
        relpath_file = file.relative_to(DOC_PATH)
        if file.is_dir():
            if verbose:
                print(f"I don't care about '{relpath_file}', it's a directory : 🤖🤷")
            continue
        if file.name in NO_CHECK_REQUIRED_FILES:
            if verbose:
                print(
                    f"I don't care about '{relpath_file}' it's in 'NO_CHECK_REQUIRED_FILES' : 🤖🤷"
                )
            continue
        version = (
            file.parents[0].name if file.stem in {"summary", "full"} else file.stem
        )
        if any(version == x for x in UNCHECKED_VERSION):
            if verbose:
                print(
                    f"I don't care about '{relpath_file}' {version} is in UNCHECKED_VERSION : 🤖🤷"
                )
            continue
        if verbose:
            print(f"I'm going to check '{relpath_file}' 🤖")
        num = tuple(int(x) for x in (version.split(".")))
        numeric_whatsnew[num] = file
    for num in sorted(numeric_whatsnew):
        yield numeric_whatsnew[num]


def main(argv: list[str] | None = None) -> int:
    argv = argv or sys.argv[1:]
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--issue-number",
        type=int,
        default=0,
        help="The issue we expect to find in the changelog.",
    )
    parser.add_argument("--verbose", "-v", action="count", default=0)
    args = parser.parse_args(argv)
    verbose = args.verbose
    is_valid = True
    for file in sorted_whatsnew(verbose):
        if not check_file(file, verbose):
            is_valid = False
    return 0 if is_valid else 1


def check_file(file: Path, verbose: bool) -> bool:
    """Check that a file contain valid change-log's entries."""
    with open(file, encoding="utf8") as f:
        content = f.read()
    valid_full_descriptions = VALID_CHANGELOG_COMPILED_PATTERN.findall(content)
    result = len(valid_full_descriptions)
    contain_issue_number_descriptions = VALID_ISSUE_NUMBER_COMPILED_PATTERN.findall(
        content
    )
    expected = len(contain_issue_number_descriptions)
    if result != expected:
        return create_detailed_fail_message(
            file, contain_issue_number_descriptions, valid_full_descriptions
        )
    if verbose:
        relpath_file = file.relative_to(DOC_PATH)
        print(f"Checked '{relpath_file}' : LGTM 🤖👍")
    return True


def create_detailed_fail_message(
    file_name: Path,
    contain_issue_number_descriptions: list,
    valid_full_descriptions: list,
) -> bool:
    is_valid = True
    for issue_number_description in contain_issue_number_descriptions:
        if not any(v[0] in issue_number_description for v in valid_full_descriptions):
            is_valid = False
            issue_number = ISSUE_NUMBER_COMPILED_PATTERN.findall(
                issue_number_description
            )[0]
            print(
                f"{file_name}: {issue_number}'s description is not on one line, or "
                "does not respect the standard format 🤖👎"
            )
    return is_valid


if __name__ == "__main__":
    sys.exit(main())