# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html # For details: https://github.com/pylint-dev/pylint/blob/main/LICENSE # Copyright (c) https://github.com/pylint-dev/pylint/blob/main/CONTRIBUTORS.txt from __future__ import annotations import re from collections.abc import Generator from typing import NamedTuple # Allow stopping after the first semicolon/hash encountered, # so that an option can be continued with the reasons # why it is active or disabled. OPTION_RGX = r""" (?:^\s*\#.*|\s*| # Comment line, or whitespaces, \s*\#.*(?=\#.*?\bpylint:)) # or a beginning of an inline comment # followed by "pylint:" pragma (\# # Beginning of comment .*? # Anything (as little as possible) \bpylint: # pylint word and column \s* # Any number of whitespaces ([^;#\n]+)) # Anything except semicolon or hash or # newline (it is the second matched group) # and end of the first matched group [;#]{0,1} # From 0 to 1 repetition of semicolon or hash """ OPTION_PO = re.compile(OPTION_RGX, re.VERBOSE) class PragmaRepresenter(NamedTuple): action: str messages: list[str] ATOMIC_KEYWORDS = frozenset(("disable-all", "skip-file")) MESSAGE_KEYWORDS = frozenset( ("disable-next", "disable-msg", "enable-msg", "disable", "enable") ) # sorted is necessary because sets are unordered collections and ALL_KEYWORDS # string should not vary between executions # reverse is necessary in order to have the longest keywords first, so that, for example, # 'disable' string should not be matched instead of 'disable-all' ALL_KEYWORDS = "|".join( sorted(ATOMIC_KEYWORDS | MESSAGE_KEYWORDS, key=len, reverse=True) ) TOKEN_SPECIFICATION = [ ("KEYWORD", rf"\b({ALL_KEYWORDS:s})\b"), ("MESSAGE_STRING", r"[0-9A-Za-z\-\_]{2,}"), # Identifiers ("ASSIGN", r"="), # Assignment operator ("MESSAGE_NUMBER", r"[CREIWF]{1}\d*"), ] TOK_REGEX = "|".join( f"(?P<{token_name:s}>{token_rgx:s})" for token_name, token_rgx in TOKEN_SPECIFICATION ) def emit_pragma_representer(action: str, messages: list[str]) -> PragmaRepresenter: if not messages and action in MESSAGE_KEYWORDS: raise InvalidPragmaError( "The keyword is not followed by message identifier", action ) return PragmaRepresenter(action, messages) class PragmaParserError(Exception): """A class for exceptions thrown by pragma_parser module.""" def __init__(self, message: str, token: str) -> None: """:args message: explain the reason why the exception has been thrown :args token: token concerned by the exception. """ self.message = message self.token = token super().__init__(self.message) class UnRecognizedOptionError(PragmaParserError): """Thrown in case the of a valid but unrecognized option.""" class InvalidPragmaError(PragmaParserError): """Thrown in case the pragma is invalid.""" def parse_pragma(pylint_pragma: str) -> Generator[PragmaRepresenter, None, None]: action: str | None = None messages: list[str] = [] assignment_required = False previous_token = "" for mo in re.finditer(TOK_REGEX, pylint_pragma): kind = mo.lastgroup value = mo.group() if kind == "ASSIGN": if not assignment_required: if action: # A keyword has been found previously but doesn't support assignment raise UnRecognizedOptionError( "The keyword doesn't support assignment", action ) if previous_token: # Something found previously but not a known keyword raise UnRecognizedOptionError( "The keyword is unknown", previous_token ) # Nothing at all detected before this assignment raise InvalidPragmaError("Missing keyword before assignment", "") assignment_required = False elif assignment_required: raise InvalidPragmaError( "The = sign is missing after the keyword", action or "" ) elif kind == "KEYWORD": if action: yield emit_pragma_representer(action, messages) action = value messages = [] assignment_required = action in MESSAGE_KEYWORDS elif kind in {"MESSAGE_STRING", "MESSAGE_NUMBER"}: messages.append(value) assignment_required = False else: raise RuntimeError("Token not recognized") previous_token = value if action: yield emit_pragma_representer(action, messages) else: raise UnRecognizedOptionError("The keyword is unknown", previous_token)