From ac0aeba0eddb0b8e2285f5ac568e4621426b3aa7 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 31 Dec 2018 22:29:32 -0800 Subject: Parse PEP 484 type comments --- .appveyor.yml | 4 +- .travis.yml | 8 +- pyflakes/api.py | 10 +- pyflakes/checker.py | 156 +++++++++++++++++++++++----- pyflakes/messages.py | 8 ++ pyflakes/test/harness.py | 8 +- pyflakes/test/test_checker.py | 186 ++++++++++++++++++++++++++++++++++ pyflakes/test/test_other.py | 66 ++++++++++++ pyflakes/test/test_undefined_names.py | 8 +- tox.ini | 4 +- 10 files changed, 406 insertions(+), 52 deletions(-) create mode 100644 pyflakes/test/test_checker.py diff --git a/.appveyor.yml b/.appveyor.yml index 5739e3b..f7d7aa2 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -44,5 +44,5 @@ build: off test_script: - python -m tox -# - C:\pypy3-v5.10.0-win32\pypy3 setup.py test -q - - C:\pypy-2.6.1-win32\pypy setup.py test -q +# - C:\pypy3-v5.10.0-win32\pypy3 -m unittest discover pyflakes + - C:\pypy-2.6.1-win32\pypy -m unittest discover pyflakes diff --git a/.travis.yml b/.travis.yml index 5d92e4f..20e195d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,9 +13,5 @@ matrix: dist: xenial - python: nightly dist: xenial -install: - - pip install --upgrade . - - pip list -script: - - python setup.py test -q - - if [ "$TRAVIS_PYTHON_VERSION" != "nightly" ]; then pip install flake8==3.6.0 && flake8 --version && flake8 pyflakes setup.py; fi +install: pip install tox +script: tox -e py diff --git a/pyflakes/api.py b/pyflakes/api.py index 3ddb386..72d6b6c 100644 --- a/pyflakes/api.py +++ b/pyflakes/api.py @@ -3,18 +3,17 @@ API for the command-line I{pyflakes} tool. """ from __future__ import with_statement -import sys +import ast import os import platform import re -import _ast +import sys from pyflakes import checker, __version__ from pyflakes import reporter as modReporter __all__ = ['check', 'checkPath', 'checkRecursive', 'iterSourceCode', 'main'] - PYTHON_SHEBANG_REGEX = re.compile(br'^#!.*\bpython[23w]?\b\s*$') @@ -39,7 +38,7 @@ def check(codeString, filename, reporter=None): reporter = modReporter._makeDefaultReporter() # First, compile into an AST and handle syntax errors. try: - tree = compile(codeString, filename, "exec", _ast.PyCF_ONLY_AST) + tree = ast.parse(codeString, filename=filename) except SyntaxError: value = sys.exc_info()[1] msg = value.args[0] @@ -71,7 +70,8 @@ def check(codeString, filename, reporter=None): reporter.unexpectedError(filename, 'problem decoding source') return 1 # Okay, it's syntactically valid. Now check it. - w = checker.Checker(tree, filename) + tokens = checker.make_tokens(codeString) + w = checker.Checker(tree, tokens=tokens, filename=filename) w.messages.sort(key=lambda m: m.lineno) for warning in w.messages: reporter.flake(warning) diff --git a/pyflakes/checker.py b/pyflakes/checker.py index a090d8a..3c4fa0f 100644 --- a/pyflakes/checker.py +++ b/pyflakes/checker.py @@ -6,9 +6,14 @@ Also, it models the Bindings and Scopes. """ import __future__ import ast +import bisect +import collections import doctest +import functools import os +import re import sys +import tokenize from pyflakes import messages @@ -23,6 +28,10 @@ except AttributeError: builtin_vars = dir(__import__('__builtin__' if PY2 else 'builtins')) +if PY2: + tokenize_tokenize = tokenize.generate_tokens +else: + tokenize_tokenize = tokenize.tokenize if PY2: def getNodeType(node_class): @@ -63,6 +72,13 @@ else: FOR_TYPES = (ast.For,) LOOP_TYPES = (ast.While, ast.For) +# https://github.com/python/typed_ast/blob/55420396/ast27/Parser/tokenizer.c#L102-L104 +TYPE_COMMENT_RE = re.compile(r'^#\s*type:\s*') +# https://github.com/python/typed_ast/blob/55420396/ast27/Parser/tokenizer.c#L1400 +TYPE_IGNORE_RE = re.compile(TYPE_COMMENT_RE.pattern + r'ignore\s*(#|$)') +# https://github.com/python/typed_ast/blob/55420396/ast27/Grammar/Grammar#L147 +TYPE_FUNC_RE = re.compile(r'^(\(.*?\))\s*->\s*(.*)$') + class _FieldsOrder(dict): """Fix order of AST node fields.""" @@ -522,6 +538,63 @@ def is_typing_overload(value, scope): ) +def make_tokens(code): + # PY3: tokenize.tokenize requires readline of bytes + if not isinstance(code, bytes): + code = code.encode('UTF-8') + lines = iter(code.splitlines(True)) + # next(lines, b'') is to prevent an error in pypy3 + return tuple(tokenize_tokenize(lambda: next(lines, b''))) + + +class _TypeableVisitor(ast.NodeVisitor): + """Collect the line number and nodes which are deemed typeable by + PEP 484 + + https://www.python.org/dev/peps/pep-0484/#type-comments + """ + def __init__(self): + self.typeable_lines = [] # type: List[int] + self.typeable_nodes = {} # type: Dict[int, ast.AST] + + def _typeable(self, node): + # if there is more than one typeable thing on a line last one wins + self.typeable_lines.append(node.lineno) + self.typeable_nodes[node.lineno] = node + + self.generic_visit(node) + + visit_Assign = visit_For = visit_FunctionDef = visit_With = _typeable + visit_AsyncFor = visit_AsyncFunctionDef = visit_AsyncWith = _typeable + + +def _collect_type_comments(tree, tokens): + visitor = _TypeableVisitor() + visitor.visit(tree) + + type_comments = collections.defaultdict(list) + for tp, text, start, _, _ in tokens: + if ( + tp != tokenize.COMMENT or # skip non comments + not TYPE_COMMENT_RE.match(text) or # skip non-type comments + TYPE_IGNORE_RE.match(text) # skip ignores + ): + continue + + # search for the typeable node at or before the line number of the + # type comment. + # if the bisection insertion point is before any nodes this is an + # invalid type comment which is ignored. + lineno, _ = start + idx = bisect.bisect_right(visitor.typeable_lines, lineno) + if idx == 0: + continue + node = visitor.typeable_nodes[visitor.typeable_lines[idx - 1]] + type_comments[node].append((start, text)) + + return type_comments + + class Checker(object): """ I check the cleanliness and sanity of Python code. @@ -556,8 +629,11 @@ class Checker(object): builtIns.update(_customBuiltIns.split(',')) del _customBuiltIns + # TODO: tokens= is required to perform checks on type comments, eventually + # make this a required positional argument. For now it is defaulted + # to `()` for api compatibility. def __init__(self, tree, filename='(none)', builtins=None, - withDoctest='PYFLAKES_DOCTEST' in os.environ): + withDoctest='PYFLAKES_DOCTEST' in os.environ, tokens=()): self._nodeHandlers = {} self._deferredFunctions = [] self._deferredAssignments = [] @@ -573,6 +649,7 @@ class Checker(object): raise RuntimeError('No scope implemented for the node %r' % tree) self.exceptHandlers = [()] self.root = tree + self._type_comments = _collect_type_comments(tree, tokens) for builtin in self.builtIns: self.addBinding(None, Builtin(builtin)) self.handleChildren(tree) @@ -952,7 +1029,26 @@ class Checker(object): except KeyError: self.report(messages.UndefinedName, node, name) + def _handle_type_comments(self, node): + for (lineno, col_offset), comment in self._type_comments.get(node, ()): + comment = comment.split(':', 1)[1].strip() + func_match = TYPE_FUNC_RE.match(comment) + if func_match: + parts = (func_match.group(1), func_match.group(2).strip()) + else: + parts = (comment,) + + for part in parts: + if PY2: + part = part.replace('...', 'Ellipsis') + self.deferFunction(functools.partial( + self.handleStringAnnotation, + part, node, lineno, col_offset, + messages.CommentAnnotationSyntaxError, + )) + def handleChildren(self, tree, omit=None): + self._handle_type_comments(tree) for node in iter_child_nodes(tree, omit=omit): self.handleNode(node, tree) @@ -1040,7 +1136,7 @@ class Checker(object): self.addBinding(None, Builtin('_')) for example in examples: try: - tree = compile(example.source, "", "exec", ast.PyCF_ONLY_AST) + tree = ast.parse(example.source, "") except SyntaxError: e = sys.exc_info()[1] if PYPY: @@ -1056,36 +1152,40 @@ class Checker(object): self.popScope() self.scopeStack = saved_stack - def handleAnnotation(self, annotation, node): - if isinstance(annotation, ast.Str): - # Defer handling forward annotation. - def handleForwardAnnotation(): - try: - tree = ast.parse(annotation.s) - except SyntaxError: - self.report( - messages.ForwardAnnotationSyntaxError, - node, - annotation.s, - ) - return + def handleStringAnnotation(self, s, node, ref_lineno, ref_col_offset, err): + try: + tree = ast.parse(s) + except SyntaxError: + self.report(err, node, s) + return - body = tree.body - if len(body) != 1 or not isinstance(body[0], ast.Expr): - self.report( - messages.ForwardAnnotationSyntaxError, - node, - annotation.s, - ) - return + body = tree.body + if len(body) != 1 or not isinstance(body[0], ast.Expr): + self.report(err, node, s) + return - parsed_annotation = tree.body[0].value - for descendant in ast.walk(parsed_annotation): - ast.copy_location(descendant, annotation) + parsed_annotation = tree.body[0].value + for descendant in ast.walk(parsed_annotation): + if ( + 'lineno' in descendant._attributes and + 'col_offset' in descendant._attributes + ): + descendant.lineno = ref_lineno + descendant.col_offset = ref_col_offset - self.handleNode(parsed_annotation, node) + self.handleNode(parsed_annotation, node) - self.deferFunction(handleForwardAnnotation) + def handleAnnotation(self, annotation, node): + if isinstance(annotation, ast.Str): + # Defer handling forward annotation. + self.deferFunction(functools.partial( + self.handleStringAnnotation, + annotation.s, + node, + annotation.lineno, + annotation.col_offset, + messages.ForwardAnnotationSyntaxError, + )) elif self.annotationsFutureEnabled: self.deferFunction(lambda: self.handleNode(annotation, node)) else: diff --git a/pyflakes/messages.py b/pyflakes/messages.py index 4756872..4d3c7d7 100644 --- a/pyflakes/messages.py +++ b/pyflakes/messages.py @@ -248,6 +248,14 @@ class ForwardAnnotationSyntaxError(Message): self.message_args = (annotation,) +class CommentAnnotationSyntaxError(Message): + message = 'syntax error in type comment %r' + + def __init__(self, filename, loc, annotation): + Message.__init__(self, filename, loc) + self.message_args = (annotation,) + + class RaiseNotImplemented(Message): message = "'raise NotImplemented' should be 'raise NotImplementedError'" diff --git a/pyflakes/test/harness.py b/pyflakes/test/harness.py index 0a58bd5..d375ea3 100644 --- a/pyflakes/test/harness.py +++ b/pyflakes/test/harness.py @@ -1,4 +1,4 @@ - +import ast import textwrap import unittest @@ -8,7 +8,6 @@ __all__ = ['TestCase', 'skip', 'skipIf'] skip = unittest.skip skipIf = unittest.skipIf -PyCF_ONLY_AST = 1024 class TestCase(unittest.TestCase): @@ -16,11 +15,12 @@ class TestCase(unittest.TestCase): withDoctest = False def flakes(self, input, *expectedOutputs, **kw): - tree = compile(textwrap.dedent(input), "", "exec", PyCF_ONLY_AST) + tree = ast.parse(textwrap.dedent(input)) + tokens = checker.make_tokens(textwrap.dedent(input)) if kw.get('is_segment'): tree = tree.body[0] kw.pop('is_segment') - w = checker.Checker(tree, withDoctest=self.withDoctest, **kw) + w = checker.Checker(tree, tokens=tokens, withDoctest=self.withDoctest, **kw) outputs = [type(o) for o in w.messages] expectedOutputs = list(expectedOutputs) outputs.sort(key=lambda t: t.__name__) diff --git a/pyflakes/test/test_checker.py b/pyflakes/test/test_checker.py new file mode 100644 index 0000000..f47588d --- /dev/null +++ b/pyflakes/test/test_checker.py @@ -0,0 +1,186 @@ +import ast +import sys + +from pyflakes import checker +from pyflakes.test.harness import TestCase, skipIf + + +class TypeableVisitorTests(TestCase): + """ + Tests of L{_TypeableVisitor} + """ + + @staticmethod + def _run_visitor(s): + """ + Run L{_TypeableVisitor} on the parsed source and return the visitor. + """ + tree = ast.parse(s) + visitor = checker._TypeableVisitor() + visitor.visit(tree) + return visitor + + def test_node_types(self): + """ + Test that the typeable node types are collected + """ + visitor = self._run_visitor( + """\ +x = 1 # assignment +for x in range(1): pass # for loop +def f(): pass # function definition +with a as b: pass # with statement +""" + ) + self.assertEqual(visitor.typeable_lines, [1, 2, 3, 4]) + self.assertIsInstance(visitor.typeable_nodes[1], ast.Assign) + self.assertIsInstance(visitor.typeable_nodes[2], ast.For) + self.assertIsInstance(visitor.typeable_nodes[3], ast.FunctionDef) + self.assertIsInstance(visitor.typeable_nodes[4], ast.With) + + def test_visitor_recurses(self): + """ + Test the common pitfall of missing `generic_visit` in visitors by + ensuring that nested nodes are reported + """ + visitor = self._run_visitor( + """\ +def f(): + x = 1 +""" + ) + self.assertEqual(visitor.typeable_lines, [1, 2]) + self.assertIsInstance(visitor.typeable_nodes[1], ast.FunctionDef) + self.assertIsInstance(visitor.typeable_nodes[2], ast.Assign) + + @skipIf(sys.version_info < (3, 5), 'async syntax introduced in py35') + def test_py35_node_types(self): + """ + Test that the PEP 492 node types are collected + """ + visitor = self._run_visitor( + """\ +async def f(): # async def + async for x in y: pass # async for + async with a as b: pass # async with +""" + ) + self.assertEqual(visitor.typeable_lines, [1, 2, 3]) + self.assertIsInstance(visitor.typeable_nodes[1], ast.AsyncFunctionDef) + self.assertIsInstance(visitor.typeable_nodes[2], ast.AsyncFor) + self.assertIsInstance(visitor.typeable_nodes[3], ast.AsyncWith) + + def test_last_node_wins(self): + """ + Test that when two typeable nodes are present on a line, the last + typeable one wins. + """ + visitor = self._run_visitor('x = 1; y = 1') + # detected both assignable nodes + self.assertEqual(visitor.typeable_lines, [1, 1]) + # but the assignment to `y` wins + self.assertEqual(visitor.typeable_nodes[1].targets[0].id, 'y') + + +class CollectTypeCommentsTests(TestCase): + """ + Tests of L{_collect_type_comments} + """ + + @staticmethod + def _collect(s): + """ + Run L{_collect_type_comments} on the parsed source and return the + mapping from nodes to comments. The return value is converted to + a set: {(node_type, tuple of comments), ...} + """ + tree = ast.parse(s) + tokens = checker.make_tokens(s) + ret = checker._collect_type_comments(tree, tokens) + return {(type(k), tuple(s for _, s in v)) for k, v in ret.items()} + + def test_bytes(self): + """ + Test that the function works for binary source + """ + ret = self._collect(b'x = 1 # type: int') + self.assertSetEqual(ret, {(ast.Assign, ('# type: int',))}) + + def test_text(self): + """ + Test that the function works for text source + """ + ret = self._collect(u'x = 1 # type: int') + self.assertEqual(ret, {(ast.Assign, ('# type: int',))}) + + def test_non_type_comment_ignored(self): + """ + Test that a non-type comment is ignored + """ + ret = self._collect('x = 1 # noqa') + self.assertSetEqual(ret, set()) + + def test_type_comment_before_typeable(self): + """ + Test that a type comment before something typeable is ignored. + """ + ret = self._collect('# type: int\nx = 1') + self.assertSetEqual(ret, set()) + + def test_type_ignore_comment_ignored(self): + """ + Test that `# type: ignore` comments are not collected. + """ + ret = self._collect('x = 1 # type: ignore') + self.assertSetEqual(ret, set()) + + def test_type_ignore_with_other_things_ignored(self): + """ + Test that `# type: ignore` comments with more content are also not + collected. + """ + ret = self._collect('x = 1 # type: ignore # noqa') + self.assertSetEqual(ret, set()) + ret = self._collect('x = 1 #type:ignore#noqa') + self.assertSetEqual(ret, set()) + + def test_type_comment_with_extra_still_collected(self): + ret = self._collect('x = 1 # type: int # noqa') + self.assertSetEqual(ret, {(ast.Assign, ('# type: int # noqa',))}) + + def test_type_comment_without_whitespace(self): + ret = self._collect('x = 1 #type:int') + self.assertSetEqual(ret, {(ast.Assign, ('#type:int',))}) + + def test_type_comment_starts_with_word_ignore(self): + ret = self._collect('x = 1 # type: ignore[T]') + self.assertSetEqual(ret, {(ast.Assign, ('# type: ignore[T]',))}) + + def test_last_node_wins(self): + """ + Test that when two typeable nodes are present on a line, the last + typeable one wins. + """ + ret = self._collect('def f(): x = 1 # type: int') + self.assertSetEqual(ret, {(ast.Assign, ('# type: int',))}) + + def test_function_def_assigned_comments(self): + """ + Test that type comments for function arguments are all attributed to + the function definition. + """ + ret = self._collect( + """\ +def f( + a, # type: int + b, # type: str +): + # type: (...) -> None + pass +""" + ) + expected = {( + ast.FunctionDef, + ('# type: int', '# type: str', '# type: (...) -> None'), + )} + self.assertSetEqual(ret, expected) diff --git a/pyflakes/test/test_other.py b/pyflakes/test/test_other.py index d0fff0a..f75b3d9 100644 --- a/pyflakes/test/test_other.py +++ b/pyflakes/test/test_other.py @@ -2073,6 +2073,72 @@ class TestAsyncStatements(TestCase): class B: pass ''', m.UndefinedName) + def test_typeCommentsMarkImportsAsUsed(self): + self.flakes(""" + from mod import A, B, C, D, E, F, G + + + def f( + a, # type: A + ): + # type: (...) -> B + for b in a: # type: C + with b as c: # type: D + d = c.x # type: E + return d + + + def g(x): # type: (F) -> G + return x.y + """) + + def test_typeCommentsFullSignature(self): + self.flakes(""" + from mod import A, B, C, D + def f(a, b): + # type: (A, B[C]) -> D + return a + b + """) + + def test_typeCommentsAdditionalComemnt(self): + self.flakes(""" + from mod import F + + x = 1 # type: F # noqa + """) + + def test_typeCommentsNoWhitespaceAnnotation(self): + self.flakes(""" + from mod import F + + x = 1 #type:F + """) + + def test_typeCommentsInvalidDoesNotMarkAsUsed(self): + self.flakes(""" + from mod import F + + # type: F + """, m.UnusedImport) + + def test_typeCommentsSyntaxError(self): + self.flakes(""" + def f(x): # type: (F[) -> None + pass + """, m.CommentAnnotationSyntaxError) + + def test_typeCommentsAssignedToPreviousNode(self): + # This test demonstrates an issue in the implementation which + # associates the type comment with a node above it, however the type + # comment isn't valid according to mypy. If an improved approach + # which can detect these "invalid" type comments is implemented, this + # test should be removed / improved to assert that new check. + self.flakes(""" + from mod import F + x = 1 + # type: F + """) + def test_raise_notimplemented(self): self.flakes(''' raise NotImplementedError("This is fine") diff --git a/pyflakes/test/test_undefined_names.py b/pyflakes/test/test_undefined_names.py index 222ffce..25e28dd 100644 --- a/pyflakes/test/test_undefined_names.py +++ b/pyflakes/test/test_undefined_names.py @@ -1,5 +1,4 @@ - -from _ast import PyCF_ONLY_AST +import ast from sys import version_info from pyflakes import messages as m, checker @@ -848,7 +847,8 @@ class NameTests(TestCase): A Name node with an unrecognized context results in a RuntimeError being raised. """ - tree = compile("x = 10", "", "exec", PyCF_ONLY_AST) + tree = ast.parse("x = 10") + tokens = checker.make_tokens("x = 10") # Make it into something unrecognizable. tree.body[0].targets[0].ctx = object() - self.assertRaises(RuntimeError, checker.Checker, tree) + self.assertRaises(RuntimeError, checker.Checker, tree, tokens=tokens) diff --git a/tox.ini b/tox.ini index 3a07d70..5821483 100644 --- a/tox.ini +++ b/tox.ini @@ -6,11 +6,9 @@ envlist = [testenv] deps = flake8==3.6.0 commands = - python setup.py test -q + python -m unittest discover pyflakes flake8 pyflakes setup.py [flake8] -select = E,F,W -ignore = W504 builtins = unicode max_line_length = 89 -- cgit v1.2.1