Add support for disabling line-too-long for multilines strings

This commit adds support for disabling `line-too-long` messages for multilines strings such as docstrings. When a pylint disable pragma is present at the end of the docstring, it is taken in account for the entire docstring. Close #2957
author: hippo91 <guillaume.peillex@gmail.com> 2019-11-19 09:16:54 +0100
committer: Claudiu Popa <pcmanticore@gmail.com> 2019-11-19 09:16:54 +0100
commit: 9bdae8b82fcc5b0592135cbf6bead7df360a6672 (patch)
tree: 823e86f1aa28d792d03bd963b52d1b9872d4ed9d /pylint/utils/pragma_parser.py
parent: 0ed3782d7a933b822d2ecf189fc24da0aaefbd6e (diff)
download: pylint-git-9bdae8b82fcc5b0592135cbf6bead7df360a6672.tar.gz
1 files changed, 134 insertions, 0 deletions
diff --git a/pylint/utils/pragma_parser.py b/pylint/utils/pragma_parser.py
new file mode 100644
index 000000000..6ceefe031
--- /dev/null
+++ b/pylint/utils/pragma_parser.py
@@ -0,0 +1,134 @@
+# -*- coding: utf-8 -*-
+
+# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
+# For details: https://github.com/PyCQA/pylint/blob/master/COPYING
+
+import re
+from collections import namedtuple
+from typing import Generator, List
+
+# Allow stopping after the first semicolon/hash encountered,
+# so that an option can be continued with the reasons
+# why it is active or disabled.
+OPTION_RGX = r"""
+    \s*                # Any number of whithespace
+    \#?                # One or zero hash
+    .*                 # Anything (as much as possible)
+    (\s*               # Beginning of first matched group and any number of whitespaces
+    \#                 # Beginning of comment
+    .*?                # Anything (as little as possible)
+    \bpylint:          # pylint word and column
+    \s*                # Any number of whitespaces
+    ([^;#\n]+))        # Anything except semicolon or hash or newline (it is the second matched group) 
+                       # and end of the first matched group
+    [;#]{0,1}"""  # From 0 to 1 repetition of semicolon or hash
+OPTION_PO = re.compile(OPTION_RGX, re.VERBOSE)
+
+
+PragmaRepresenter = namedtuple("PragmaRepresenter", "action messages")
+
+
+ATOMIC_KEYWORDS = frozenset(("disable-all", "skip-file"))
+MESSAGE_KEYWORDS = frozenset(("disable-msg", "enable-msg", "disable", "enable"))
+# sorted is necessary because sets are unordered collections and ALL_KEYWORDS
+#  string should not vary between executions
+#  reverse is necessary in order to have the longest keywords first, so that, for example,
+# 'disable' string should not be matched instead of 'disable-all'
+ALL_KEYWORDS = "|".join(
+    sorted(ATOMIC_KEYWORDS | MESSAGE_KEYWORDS, key=len, reverse=True)
+)
+
+
+TOKEN_SPECIFICATION = [
+    ("KEYWORD", r"\b({:s})\b".format(ALL_KEYWORDS)),
+    ("MESSAGE_STRING", r"[A-Za-z\-]{2,}"),  #  Identifiers
+    ("ASSIGN", r"="),  #  Assignment operator
+    ("MESSAGE_NUMBER", r"[CREIWF]{1}\d*"),
+]
+
+TOK_REGEX = "|".join(
+    "(?P<{:s}>{:s})".format(token_name, token_rgx)
+    for token_name, token_rgx in TOKEN_SPECIFICATION
+)
+
+
+def emit_pragma_representer(action, messages):
+    if not messages and action in MESSAGE_KEYWORDS:
+        raise InvalidPragmaError(
+            "The keyword is not followed by message identifier", action
+        )
+    return PragmaRepresenter(action, messages)
+
+
+class PragmaParserError(Exception):
+    """
+    A class for exceptions thrown by pragma_parser module
+    """
+
+    def __init__(self, message, token):
+        """
+        :args message: explain the reason why the exception has been thrown
+        :args token: token concerned by the exception
+        """
+        self.message = message
+        self.token = token
+        super(PragmaParserError, self).__init__(self.message)
+
+
+class UnRecognizedOptionError(PragmaParserError):
+    """
+    Thrown in case the of a valid but unrecognized option
+    """
+
+
+class InvalidPragmaError(PragmaParserError):
+    """
+    Thrown in case the pragma is invalid
+    """
+
+
+def parse_pragma(pylint_pragma: str) -> Generator[PragmaRepresenter, None, None]:
+    action = None
+    messages = []  # type: List[str]
+    assignment_required = False
+    previous_token = ""
+
+    for mo in re.finditer(TOK_REGEX, pylint_pragma):
+        kind = mo.lastgroup
+        value = mo.group()
+
+        if kind == "ASSIGN":
+            if not assignment_required:
+                if action:
+                    # A keyword has been found previously but doesn't support assignement
+                    raise UnRecognizedOptionError(
+                        "The keyword doesn't support assignment", action
+                    )
+                if previous_token:
+                    #  Something found previously but not a known keyword
+                    raise UnRecognizedOptionError(
+                        "The keyword is unknown", previous_token
+                    )
+                # Nothing at all detected before this assignment
+                raise InvalidPragmaError("Missing keyword before assignment", "")
+            assignment_required = False
+        elif assignment_required:
+            raise InvalidPragmaError("The = sign is missing after the keyword", action)
+        elif kind == "KEYWORD":
+            if action:
+                yield emit_pragma_representer(action, messages)
+            action = value
+            messages = list()
+            assignment_required = action in MESSAGE_KEYWORDS
+        elif kind in ("MESSAGE_STRING", "MESSAGE_NUMBER"):
+            messages.append(value)
+            assignment_required = False
+        else:
+            raise RuntimeError("Token not recognized")
+
+        previous_token = value
+
+    if action:
+        yield emit_pragma_representer(action, messages)
+    else:
+        raise UnRecognizedOptionError("The keyword is unknown", previous_token)
author	hippo91 <guillaume.peillex@gmail.com>	2019-11-19 09:16:54 +0100
committer	Claudiu Popa <pcmanticore@gmail.com>	2019-11-19 09:16:54 +0100
commit	9bdae8b82fcc5b0592135cbf6bead7df360a6672 (patch)
tree	823e86f1aa28d792d03bd963b52d1b9872d4ed9d /pylint/utils/pragma_parser.py
parent	0ed3782d7a933b822d2ecf189fc24da0aaefbd6e (diff)
download	pylint-git-9bdae8b82fcc5b0592135cbf6bead7df360a6672.tar.gz