New option: logging-format-style for logging checker (#2521)

logging-format-style accepts one of '%' or '{', (defaults to '%'). When '{' is selected, logging checker assumes str.format() style format strings for calls to the logging. pylint was unable to count the required number of args for the format string when the format string was using the `{` format. The new feature indirectly fixes that by allowing the proper interpretation of that format string.
author: Alan Chan <achan961117@gmail.com> 2018-10-04 15:28:36 +0800
committer: Claudiu Popa <pcmanticore@gmail.com> 2018-10-04 09:28:36 +0200
commit: 0dd573faae1ad339c07f789270002f8faf2a3691 (patch)
tree: 5b139a624a01a1e86a2cbe0eb920d80ca167bc41
parent: 540e26db2e05c4f4f555db54d4c046ceff0e7763 (diff)
download: pylint-git-0dd573faae1ad339c07f789270002f8faf2a3691.tar.gz
8 files changed, 146 insertions, 101 deletions
diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
index 1d74db964..eea7b2c6d 100644
--- a/CONTRIBUTORS.txt
+++ b/CONTRIBUTORS.txt
@@ -236,6 +236,8 @@ contributors:
 
 * Tomer Chachamu, Richard Goodman: simplifiable-if-expression
 
+* Alan Chan: contributor
+
 * Benjamin Drung: contributing Debian Developer
 
 * Scott Worley: contributor
diff --git a/ChangeLog b/ChangeLog
index 62a75d27d..396030a8f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -160,6 +160,8 @@ Release date: TBA
 
      Close #2430
 
+   * Add new option to logging checker, ``logging_format_style``
+
    * Fix --ignore-imports to understand multi-line imports
 
      Close #1422
diff --git a/doc/whatsnew/2.2.rst b/doc/whatsnew/2.2.rst
index 09176daad..e001dd3d4 100644
--- a/doc/whatsnew/2.2.rst
+++ b/doc/whatsnew/2.2.rst
@@ -17,6 +17,9 @@ New checkers
 * ``duplicate-string-formatting-argument`` was added for detecting duplicate string
   formatting arguments that should be passed instead as named arguments.
 
+* ``logging-format-style`` is a new option for the logging checker for usage of
+  str.format() style format strings in calls to loggers.
+
 Other Changes
 =============
 
diff --git a/pylint/checkers/logging.py b/pylint/checkers/logging.py
index 92a4a0428..139faf9f4 100644
--- a/pylint/checkers/logging.py
+++ b/pylint/checkers/logging.py
@@ -136,6 +136,16 @@ class LoggingChecker(checkers.BaseChecker):
                 "arguments are in logging function parameter format.",
             },
         ),
+        (
+            "logging-format-style",
+            {
+                "default": "%",
+                "type": "choice",
+                "metavar": "<% or {>",
+                "choices": ["%", "{"],
+                "help": "Format style used to check logging format string",
+            },
+        ),
     )
 
     def visit_module(self, node):  # pylint: disable=unused-argument
@@ -146,6 +156,7 @@ class LoggingChecker(checkers.BaseChecker):
         self._logging_names = set()
         logging_mods = self.config.logging_modules
 
+        self._format_style = self.config.logging_format_style
         self._logging_modules = set(logging_mods)
         self._from_imports = {}
         for logging_mod in logging_mods:
@@ -284,13 +295,21 @@ class LoggingChecker(checkers.BaseChecker):
             required_num_args = 0
         else:
             try:
-                keyword_args, required_num_args, _, _ = utils.parse_format_string(
-                    format_string
-                )
-                if keyword_args:
-                    # Keyword checking on logging strings is complicated by
-                    # special keywords - out of scope.
-                    return
+                if self._format_style == "%":
+                    keyword_args, required_num_args, _, _ = utils.parse_format_string(
+                        format_string
+                    )
+                    if keyword_args:
+                        # Keyword checking on logging strings is complicated by
+                        # special keywords - out of scope.
+                        return
+                elif self._format_style == "{":
+                    keys, num_args, manual_pos_arg = utils.parse_format_method_string(
+                        format_string
+                    )
+
+                    kargs = len(set(k for k, l in keys if not isinstance(k, int)))
+                    required_num_args = kargs + num_args + manual_pos_arg
             except utils.UnsupportedFormatCharacter as ex:
                 char = format_string[ex.index]
                 self.add_message(
diff --git a/pylint/checkers/strings.py b/pylint/checkers/strings.py
index f14673cab..0f7123a71 100644
--- a/pylint/checkers/strings.py
+++ b/pylint/checkers/strings.py
@@ -25,7 +25,6 @@
 import builtins
 import sys
 import tokenize
-import string
 import numbers
 from collections import Counter
 
@@ -171,98 +170,6 @@ BUILTINS_STR = builtins.__name__ + ".str"
 BUILTINS_FLOAT = builtins.__name__ + ".float"
 BUILTINS_INT = builtins.__name__ + ".int"
 
-if _PY3K:
-    import _string  # pylint: disable=wrong-import-position, wrong-import-order
-
-    def split_format_field_names(format_string):
-        try:
-            return _string.formatter_field_name_split(format_string)
-        except ValueError:
-            raise utils.IncompleteFormatString()
-
-
-else:
-
-    def _field_iterator_convertor(iterator):
-        for is_attr, key in iterator:
-            if isinstance(key, numbers.Number):
-                yield is_attr, int(key)
-            else:
-                yield is_attr, key
-
-    def split_format_field_names(format_string):
-        try:
-            keyname, fielditerator = format_string._formatter_field_name_split()
-        except ValueError:
-            raise utils.IncompleteFormatString
-        # it will return longs, instead of ints, which will complicate
-        # the output
-        return keyname, _field_iterator_convertor(fielditerator)
-
-
-def collect_string_fields(format_string):
-    """ Given a format string, return an iterator
-    of all the valid format fields. It handles nested fields
-    as well.
-    """
-
-    formatter = string.Formatter()
-    try:
-        parseiterator = formatter.parse(format_string)
-        for result in parseiterator:
-            if all(item is None for item in result[1:]):
-                # not a replacement format
-                continue
-            name = result[1]
-            nested = result[2]
-            yield name
-            if nested:
-                for field in collect_string_fields(nested):
-                    yield field
-    except ValueError as exc:
-        # Probably the format string is invalid.
-        if exc.args[0].startswith("cannot switch from manual"):
-            # On Jython, parsing a string with both manual
-            # and automatic positions will fail with a ValueError,
-            # while on CPython it will simply return the fields,
-            # the validation being done in the interpreter (?).
-            # We're just returning two mixed fields in order
-            # to trigger the format-combined-specification check.
-            yield ""
-            yield "1"
-            return
-        raise utils.IncompleteFormatString(format_string)
-
-
-def parse_format_method_string(format_string):
-    """
-    Parses a PEP 3101 format string, returning a tuple of
-    (keys, num_args, manual_pos_arg),
-    where keys is the set of mapping keys in the format string, num_args
-    is the number of arguments required by the format string and
-    manual_pos_arg is the number of arguments passed with the position.
-    """
-    keys = []
-    num_args = 0
-    manual_pos_arg = set()
-    for name in collect_string_fields(format_string):
-        if name and str(name).isdigit():
-            manual_pos_arg.add(str(name))
-        elif name:
-            keyname, fielditerator = split_format_field_names(name)
-            if isinstance(keyname, numbers.Number):
-                # In Python 2 it will return long which will lead
-                # to different output between 2 and 3
-                manual_pos_arg.add(str(keyname))
-                keyname = int(keyname)
-            try:
-                keys.append((keyname, list(fielditerator)))
-            except ValueError:
-                raise utils.IncompleteFormatString()
-        else:
-            num_args += 1
-    return keys, num_args, len(manual_pos_arg)
-
 
 def get_access_path(key, parts):
     """ Given a list of format specifiers, returns
@@ -487,7 +394,9 @@ class StringFormatChecker(BaseChecker):
             return
 
         try:
-            fields, num_args, manual_pos = parse_format_method_string(strnode.value)
+            fields, num_args, manual_pos = utils.parse_format_method_string(
+                strnode.value
+            )
         except utils.IncompleteFormatString:
             self.add_message("bad-format-string", node=node)
             return
diff --git a/pylint/checkers/utils.py b/pylint/checkers/utils.py
index f4d10006b..98a892f90 100644
--- a/pylint/checkers/utils.py
+++ b/pylint/checkers/utils.py
@@ -33,6 +33,7 @@
 import builtins
 from functools import lru_cache, partial, singledispatch
 import itertools
+import numbers
 import re
 import sys
 import string
@@ -48,6 +49,7 @@ from typing import (
     List,
     Type,
 )
+import _string  # pylint: disable=wrong-import-position, wrong-import-order
 
 import astroid
 from astroid import bases as _bases
@@ -535,6 +537,78 @@ def parse_format_string(
     return keys, num_args, key_types, pos_types
 
 
+def split_format_field_names(format_string) -> Tuple[str, Iterable[Tuple[bool, str]]]:
+    try:
+        return _string.formatter_field_name_split(format_string)
+    except ValueError:
+        raise IncompleteFormatString()
+
+
+def collect_string_fields(format_string) -> Iterable[Optional[str]]:
+    """ Given a format string, return an iterator
+    of all the valid format fields. It handles nested fields
+    as well.
+    """
+    formatter = string.Formatter()
+    try:
+        parseiterator = formatter.parse(format_string)
+        for result in parseiterator:
+            if all(item is None for item in result[1:]):
+                # not a replacement format
+                continue
+            name = result[1]
+            nested = result[2]
+            yield name
+            if nested:
+                for field in collect_string_fields(nested):
+                    yield field
+    except ValueError as exc:
+        # Probably the format string is invalid.
+        if exc.args[0].startswith("cannot switch from manual"):
+            # On Jython, parsing a string with both manual
+            # and automatic positions will fail with a ValueError,
+            # while on CPython it will simply return the fields,
+            # the validation being done in the interpreter (?).
+            # We're just returning two mixed fields in order
+            # to trigger the format-combined-specification check.
+            yield ""
+            yield "1"
+            return
+        raise IncompleteFormatString(format_string)
+
+
+def parse_format_method_string(
+    format_string: str
+) -> Tuple[List[Tuple[str, List[Tuple[bool, str]]]], int, int]:
+    """
+    Parses a PEP 3101 format string, returning a tuple of
+    (keys, num_args, manual_pos_arg),
+    where keys is the set of mapping keys in the format string, num_args
+    is the number of arguments required by the format string and
+    manual_pos_arg is the number of arguments passed with the position.
+    """
+    keys = []
+    num_args = 0
+    manual_pos_arg = set()
+    for name in collect_string_fields(format_string):
+        if name and str(name).isdigit():
+            manual_pos_arg.add(str(name))
+        elif name:
+            keyname, fielditerator = split_format_field_names(name)
+            if isinstance(keyname, numbers.Number):
+                # In Python 2 it will return long which will lead
+                # to different output between 2 and 3
+                manual_pos_arg.add(str(keyname))
+                keyname = int(keyname)
+            try:
+                keys.append((keyname, list(fielditerator)))
+            except ValueError:
+                raise IncompleteFormatString()
+        else:
+            num_args += 1
+    return keys, num_args, len(manual_pos_arg)
+
+
 def is_attr_protected(attrname: str) -> bool:
     """return True if attribute name is protected (start with _ and some other
     details), False otherwise.
diff --git a/pylint/test/unittest_checker_logging.py b/pylint/test/unittest_checker_logging.py
index ef2b40b0f..62891af0b 100644
--- a/pylint/test/unittest_checker_logging.py
+++ b/pylint/test/unittest_checker_logging.py
@@ -62,3 +62,16 @@ class TestLoggingModuleDetection(CheckerTestCase):
         self.checker.visit_import(stmts[0])
         with self.assertAddsMessages(Message("logging-not-lazy", node=stmts[1])):
             self.checker.visit_call(stmts[1])
+
+    @set_config(logging_format_style="{")
+    def test_brace_format_style(self):
+        stmts = astroid.extract_node(
+            """
+        import logging #@
+        logging.error('{}', 1) #@
+        """
+        )
+        self.checker.visit_module(None)
+        self.checker.visit_import(stmts[0])
+        with self.assertNoMessages():
+            self.checker.visit_call(stmts[1])
diff --git a/pylint/test/unittest_checkers_utils.py b/pylint/test/unittest_checkers_utils.py
index 1fb4ce368..3471912c3 100644
--- a/pylint/test/unittest_checkers_utils.py
+++ b/pylint/test/unittest_checkers_utils.py
@@ -147,3 +147,26 @@ def test_is_subclass_of_not_classdefs():
     assert not utils.is_subclass_of(None, node)
     assert not utils.is_subclass_of(node, None)
     assert not utils.is_subclass_of(None, None)
+
+
+def test_parse_format_method_string():
+    samples = [
+        ("{}", 1),
+        ("{}:{}", 2),
+        ("{field}", 1),
+        ("{:5}", 1),
+        ("{:10}", 1),
+        ("{field:10}", 1),
+        ("{field:10}{{}}", 1),
+        ("{:5}{!r:10}", 2),
+        ("{:5}{}{{}}{}", 3),
+        ("{0}{1}{0}", 2),
+        ("Coordinates: {latitude}, {longitude}", 2),
+        ("X: {0[0]};  Y: {0[1]}", 1),
+        ("{:*^30}", 1),
+        ("{!r:}", 1),
+    ]
+    for fmt, count in samples:
+        keys, num_args, pos_args = utils.parse_format_method_string(fmt)
+        keyword_args = len(set(k for k, l in keys if not isinstance(k, int)))
+        assert keyword_args + num_args + pos_args == count
author	Alan Chan <achan961117@gmail.com>	2018-10-04 15:28:36 +0800
committer	Claudiu Popa <pcmanticore@gmail.com>	2018-10-04 09:28:36 +0200
commit	0dd573faae1ad339c07f789270002f8faf2a3691 (patch)
tree	5b139a624a01a1e86a2cbe0eb920d80ca167bc41
parent	540e26db2e05c4f4f555db54d4c046ceff0e7763 (diff)
download	pylint-git-0dd573faae1ad339c07f789270002f8faf2a3691.tar.gz