summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Sottile <asottile@umich.edu>2018-12-31 22:29:32 -0800
committerIan Stapleton Cordasco <graffatcolmingov@gmail.com>2019-01-04 12:11:38 -0600
commitac0aeba0eddb0b8e2285f5ac568e4621426b3aa7 (patch)
tree04f307a971c6de910c5b8e0498d35f580a35473c
parent009f4e7c21890cefafe5858aa682a7e7f17d6a80 (diff)
downloadpyflakes-ac0aeba0eddb0b8e2285f5ac568e4621426b3aa7.tar.gz
Parse PEP 484 type comments
-rw-r--r--.appveyor.yml4
-rw-r--r--.travis.yml8
-rw-r--r--pyflakes/api.py10
-rw-r--r--pyflakes/checker.py156
-rw-r--r--pyflakes/messages.py8
-rw-r--r--pyflakes/test/harness.py8
-rw-r--r--pyflakes/test/test_checker.py186
-rw-r--r--pyflakes/test/test_other.py66
-rw-r--r--pyflakes/test/test_undefined_names.py8
-rw-r--r--tox.ini4
10 files changed, 406 insertions, 52 deletions
diff --git a/.appveyor.yml b/.appveyor.yml
index 5739e3b..f7d7aa2 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -44,5 +44,5 @@ build: off
test_script:
- python -m tox
-# - C:\pypy3-v5.10.0-win32\pypy3 setup.py test -q
- - C:\pypy-2.6.1-win32\pypy setup.py test -q
+# - C:\pypy3-v5.10.0-win32\pypy3 -m unittest discover pyflakes
+ - C:\pypy-2.6.1-win32\pypy -m unittest discover pyflakes
diff --git a/.travis.yml b/.travis.yml
index 5d92e4f..20e195d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,9 +13,5 @@ matrix:
dist: xenial
- python: nightly
dist: xenial
-install:
- - pip install --upgrade .
- - pip list
-script:
- - python setup.py test -q
- - if [ "$TRAVIS_PYTHON_VERSION" != "nightly" ]; then pip install flake8==3.6.0 && flake8 --version && flake8 pyflakes setup.py; fi
+install: pip install tox
+script: tox -e py
diff --git a/pyflakes/api.py b/pyflakes/api.py
index 3ddb386..72d6b6c 100644
--- a/pyflakes/api.py
+++ b/pyflakes/api.py
@@ -3,18 +3,17 @@ API for the command-line I{pyflakes} tool.
"""
from __future__ import with_statement
-import sys
+import ast
import os
import platform
import re
-import _ast
+import sys
from pyflakes import checker, __version__
from pyflakes import reporter as modReporter
__all__ = ['check', 'checkPath', 'checkRecursive', 'iterSourceCode', 'main']
-
PYTHON_SHEBANG_REGEX = re.compile(br'^#!.*\bpython[23w]?\b\s*$')
@@ -39,7 +38,7 @@ def check(codeString, filename, reporter=None):
reporter = modReporter._makeDefaultReporter()
# First, compile into an AST and handle syntax errors.
try:
- tree = compile(codeString, filename, "exec", _ast.PyCF_ONLY_AST)
+ tree = ast.parse(codeString, filename=filename)
except SyntaxError:
value = sys.exc_info()[1]
msg = value.args[0]
@@ -71,7 +70,8 @@ def check(codeString, filename, reporter=None):
reporter.unexpectedError(filename, 'problem decoding source')
return 1
# Okay, it's syntactically valid. Now check it.
- w = checker.Checker(tree, filename)
+ tokens = checker.make_tokens(codeString)
+ w = checker.Checker(tree, tokens=tokens, filename=filename)
w.messages.sort(key=lambda m: m.lineno)
for warning in w.messages:
reporter.flake(warning)
diff --git a/pyflakes/checker.py b/pyflakes/checker.py
index a090d8a..3c4fa0f 100644
--- a/pyflakes/checker.py
+++ b/pyflakes/checker.py
@@ -6,9 +6,14 @@ Also, it models the Bindings and Scopes.
"""
import __future__
import ast
+import bisect
+import collections
import doctest
+import functools
import os
+import re
import sys
+import tokenize
from pyflakes import messages
@@ -23,6 +28,10 @@ except AttributeError:
builtin_vars = dir(__import__('__builtin__' if PY2 else 'builtins'))
+if PY2:
+ tokenize_tokenize = tokenize.generate_tokens
+else:
+ tokenize_tokenize = tokenize.tokenize
if PY2:
def getNodeType(node_class):
@@ -63,6 +72,13 @@ else:
FOR_TYPES = (ast.For,)
LOOP_TYPES = (ast.While, ast.For)
+# https://github.com/python/typed_ast/blob/55420396/ast27/Parser/tokenizer.c#L102-L104
+TYPE_COMMENT_RE = re.compile(r'^#\s*type:\s*')
+# https://github.com/python/typed_ast/blob/55420396/ast27/Parser/tokenizer.c#L1400
+TYPE_IGNORE_RE = re.compile(TYPE_COMMENT_RE.pattern + r'ignore\s*(#|$)')
+# https://github.com/python/typed_ast/blob/55420396/ast27/Grammar/Grammar#L147
+TYPE_FUNC_RE = re.compile(r'^(\(.*?\))\s*->\s*(.*)$')
+
class _FieldsOrder(dict):
"""Fix order of AST node fields."""
@@ -522,6 +538,63 @@ def is_typing_overload(value, scope):
)
+def make_tokens(code):
+ # PY3: tokenize.tokenize requires readline of bytes
+ if not isinstance(code, bytes):
+ code = code.encode('UTF-8')
+ lines = iter(code.splitlines(True))
+ # next(lines, b'') is to prevent an error in pypy3
+ return tuple(tokenize_tokenize(lambda: next(lines, b'')))
+
+
+class _TypeableVisitor(ast.NodeVisitor):
+ """Collect the line number and nodes which are deemed typeable by
+ PEP 484
+
+ https://www.python.org/dev/peps/pep-0484/#type-comments
+ """
+ def __init__(self):
+ self.typeable_lines = [] # type: List[int]
+ self.typeable_nodes = {} # type: Dict[int, ast.AST]
+
+ def _typeable(self, node):
+ # if there is more than one typeable thing on a line last one wins
+ self.typeable_lines.append(node.lineno)
+ self.typeable_nodes[node.lineno] = node
+
+ self.generic_visit(node)
+
+ visit_Assign = visit_For = visit_FunctionDef = visit_With = _typeable
+ visit_AsyncFor = visit_AsyncFunctionDef = visit_AsyncWith = _typeable
+
+
+def _collect_type_comments(tree, tokens):
+ visitor = _TypeableVisitor()
+ visitor.visit(tree)
+
+ type_comments = collections.defaultdict(list)
+ for tp, text, start, _, _ in tokens:
+ if (
+ tp != tokenize.COMMENT or # skip non comments
+ not TYPE_COMMENT_RE.match(text) or # skip non-type comments
+ TYPE_IGNORE_RE.match(text) # skip ignores
+ ):
+ continue
+
+ # search for the typeable node at or before the line number of the
+ # type comment.
+ # if the bisection insertion point is before any nodes this is an
+ # invalid type comment which is ignored.
+ lineno, _ = start
+ idx = bisect.bisect_right(visitor.typeable_lines, lineno)
+ if idx == 0:
+ continue
+ node = visitor.typeable_nodes[visitor.typeable_lines[idx - 1]]
+ type_comments[node].append((start, text))
+
+ return type_comments
+
+
class Checker(object):
"""
I check the cleanliness and sanity of Python code.
@@ -556,8 +629,11 @@ class Checker(object):
builtIns.update(_customBuiltIns.split(','))
del _customBuiltIns
+ # TODO: tokens= is required to perform checks on type comments, eventually
+ # make this a required positional argument. For now it is defaulted
+ # to `()` for api compatibility.
def __init__(self, tree, filename='(none)', builtins=None,
- withDoctest='PYFLAKES_DOCTEST' in os.environ):
+ withDoctest='PYFLAKES_DOCTEST' in os.environ, tokens=()):
self._nodeHandlers = {}
self._deferredFunctions = []
self._deferredAssignments = []
@@ -573,6 +649,7 @@ class Checker(object):
raise RuntimeError('No scope implemented for the node %r' % tree)
self.exceptHandlers = [()]
self.root = tree
+ self._type_comments = _collect_type_comments(tree, tokens)
for builtin in self.builtIns:
self.addBinding(None, Builtin(builtin))
self.handleChildren(tree)
@@ -952,7 +1029,26 @@ class Checker(object):
except KeyError:
self.report(messages.UndefinedName, node, name)
+ def _handle_type_comments(self, node):
+ for (lineno, col_offset), comment in self._type_comments.get(node, ()):
+ comment = comment.split(':', 1)[1].strip()
+ func_match = TYPE_FUNC_RE.match(comment)
+ if func_match:
+ parts = (func_match.group(1), func_match.group(2).strip())
+ else:
+ parts = (comment,)
+
+ for part in parts:
+ if PY2:
+ part = part.replace('...', 'Ellipsis')
+ self.deferFunction(functools.partial(
+ self.handleStringAnnotation,
+ part, node, lineno, col_offset,
+ messages.CommentAnnotationSyntaxError,
+ ))
+
def handleChildren(self, tree, omit=None):
+ self._handle_type_comments(tree)
for node in iter_child_nodes(tree, omit=omit):
self.handleNode(node, tree)
@@ -1040,7 +1136,7 @@ class Checker(object):
self.addBinding(None, Builtin('_'))
for example in examples:
try:
- tree = compile(example.source, "<doctest>", "exec", ast.PyCF_ONLY_AST)
+ tree = ast.parse(example.source, "<doctest>")
except SyntaxError:
e = sys.exc_info()[1]
if PYPY:
@@ -1056,36 +1152,40 @@ class Checker(object):
self.popScope()
self.scopeStack = saved_stack
- def handleAnnotation(self, annotation, node):
- if isinstance(annotation, ast.Str):
- # Defer handling forward annotation.
- def handleForwardAnnotation():
- try:
- tree = ast.parse(annotation.s)
- except SyntaxError:
- self.report(
- messages.ForwardAnnotationSyntaxError,
- node,
- annotation.s,
- )
- return
+ def handleStringAnnotation(self, s, node, ref_lineno, ref_col_offset, err):
+ try:
+ tree = ast.parse(s)
+ except SyntaxError:
+ self.report(err, node, s)
+ return
- body = tree.body
- if len(body) != 1 or not isinstance(body[0], ast.Expr):
- self.report(
- messages.ForwardAnnotationSyntaxError,
- node,
- annotation.s,
- )
- return
+ body = tree.body
+ if len(body) != 1 or not isinstance(body[0], ast.Expr):
+ self.report(err, node, s)
+ return
- parsed_annotation = tree.body[0].value
- for descendant in ast.walk(parsed_annotation):
- ast.copy_location(descendant, annotation)
+ parsed_annotation = tree.body[0].value
+ for descendant in ast.walk(parsed_annotation):
+ if (
+ 'lineno' in descendant._attributes and
+ 'col_offset' in descendant._attributes
+ ):
+ descendant.lineno = ref_lineno
+ descendant.col_offset = ref_col_offset
- self.handleNode(parsed_annotation, node)
+ self.handleNode(parsed_annotation, node)
- self.deferFunction(handleForwardAnnotation)
+ def handleAnnotation(self, annotation, node):
+ if isinstance(annotation, ast.Str):
+ # Defer handling forward annotation.
+ self.deferFunction(functools.partial(
+ self.handleStringAnnotation,
+ annotation.s,
+ node,
+ annotation.lineno,
+ annotation.col_offset,
+ messages.ForwardAnnotationSyntaxError,
+ ))
elif self.annotationsFutureEnabled:
self.deferFunction(lambda: self.handleNode(annotation, node))
else:
diff --git a/pyflakes/messages.py b/pyflakes/messages.py
index 4756872..4d3c7d7 100644
--- a/pyflakes/messages.py
+++ b/pyflakes/messages.py
@@ -248,6 +248,14 @@ class ForwardAnnotationSyntaxError(Message):
self.message_args = (annotation,)
+class CommentAnnotationSyntaxError(Message):
+ message = 'syntax error in type comment %r'
+
+ def __init__(self, filename, loc, annotation):
+ Message.__init__(self, filename, loc)
+ self.message_args = (annotation,)
+
+
class RaiseNotImplemented(Message):
message = "'raise NotImplemented' should be 'raise NotImplementedError'"
diff --git a/pyflakes/test/harness.py b/pyflakes/test/harness.py
index 0a58bd5..d375ea3 100644
--- a/pyflakes/test/harness.py
+++ b/pyflakes/test/harness.py
@@ -1,4 +1,4 @@
-
+import ast
import textwrap
import unittest
@@ -8,7 +8,6 @@ __all__ = ['TestCase', 'skip', 'skipIf']
skip = unittest.skip
skipIf = unittest.skipIf
-PyCF_ONLY_AST = 1024
class TestCase(unittest.TestCase):
@@ -16,11 +15,12 @@ class TestCase(unittest.TestCase):
withDoctest = False
def flakes(self, input, *expectedOutputs, **kw):
- tree = compile(textwrap.dedent(input), "<test>", "exec", PyCF_ONLY_AST)
+ tree = ast.parse(textwrap.dedent(input))
+ tokens = checker.make_tokens(textwrap.dedent(input))
if kw.get('is_segment'):
tree = tree.body[0]
kw.pop('is_segment')
- w = checker.Checker(tree, withDoctest=self.withDoctest, **kw)
+ w = checker.Checker(tree, tokens=tokens, withDoctest=self.withDoctest, **kw)
outputs = [type(o) for o in w.messages]
expectedOutputs = list(expectedOutputs)
outputs.sort(key=lambda t: t.__name__)
diff --git a/pyflakes/test/test_checker.py b/pyflakes/test/test_checker.py
new file mode 100644
index 0000000..f47588d
--- /dev/null
+++ b/pyflakes/test/test_checker.py
@@ -0,0 +1,186 @@
+import ast
+import sys
+
+from pyflakes import checker
+from pyflakes.test.harness import TestCase, skipIf
+
+
+class TypeableVisitorTests(TestCase):
+ """
+ Tests of L{_TypeableVisitor}
+ """
+
+ @staticmethod
+ def _run_visitor(s):
+ """
+ Run L{_TypeableVisitor} on the parsed source and return the visitor.
+ """
+ tree = ast.parse(s)
+ visitor = checker._TypeableVisitor()
+ visitor.visit(tree)
+ return visitor
+
+ def test_node_types(self):
+ """
+ Test that the typeable node types are collected
+ """
+ visitor = self._run_visitor(
+ """\
+x = 1 # assignment
+for x in range(1): pass # for loop
+def f(): pass # function definition
+with a as b: pass # with statement
+"""
+ )
+ self.assertEqual(visitor.typeable_lines, [1, 2, 3, 4])
+ self.assertIsInstance(visitor.typeable_nodes[1], ast.Assign)
+ self.assertIsInstance(visitor.typeable_nodes[2], ast.For)
+ self.assertIsInstance(visitor.typeable_nodes[3], ast.FunctionDef)
+ self.assertIsInstance(visitor.typeable_nodes[4], ast.With)
+
+ def test_visitor_recurses(self):
+ """
+ Test the common pitfall of missing `generic_visit` in visitors by
+ ensuring that nested nodes are reported
+ """
+ visitor = self._run_visitor(
+ """\
+def f():
+ x = 1
+"""
+ )
+ self.assertEqual(visitor.typeable_lines, [1, 2])
+ self.assertIsInstance(visitor.typeable_nodes[1], ast.FunctionDef)
+ self.assertIsInstance(visitor.typeable_nodes[2], ast.Assign)
+
+ @skipIf(sys.version_info < (3, 5), 'async syntax introduced in py35')
+ def test_py35_node_types(self):
+ """
+ Test that the PEP 492 node types are collected
+ """
+ visitor = self._run_visitor(
+ """\
+async def f(): # async def
+ async for x in y: pass # async for
+ async with a as b: pass # async with
+"""
+ )
+ self.assertEqual(visitor.typeable_lines, [1, 2, 3])
+ self.assertIsInstance(visitor.typeable_nodes[1], ast.AsyncFunctionDef)
+ self.assertIsInstance(visitor.typeable_nodes[2], ast.AsyncFor)
+ self.assertIsInstance(visitor.typeable_nodes[3], ast.AsyncWith)
+
+ def test_last_node_wins(self):
+ """
+ Test that when two typeable nodes are present on a line, the last
+ typeable one wins.
+ """
+ visitor = self._run_visitor('x = 1; y = 1')
+ # detected both assignable nodes
+ self.assertEqual(visitor.typeable_lines, [1, 1])
+ # but the assignment to `y` wins
+ self.assertEqual(visitor.typeable_nodes[1].targets[0].id, 'y')
+
+
+class CollectTypeCommentsTests(TestCase):
+ """
+ Tests of L{_collect_type_comments}
+ """
+
+ @staticmethod
+ def _collect(s):
+ """
+ Run L{_collect_type_comments} on the parsed source and return the
+ mapping from nodes to comments. The return value is converted to
+ a set: {(node_type, tuple of comments), ...}
+ """
+ tree = ast.parse(s)
+ tokens = checker.make_tokens(s)
+ ret = checker._collect_type_comments(tree, tokens)
+ return {(type(k), tuple(s for _, s in v)) for k, v in ret.items()}
+
+ def test_bytes(self):
+ """
+ Test that the function works for binary source
+ """
+ ret = self._collect(b'x = 1 # type: int')
+ self.assertSetEqual(ret, {(ast.Assign, ('# type: int',))})
+
+ def test_text(self):
+ """
+ Test that the function works for text source
+ """
+ ret = self._collect(u'x = 1 # type: int')
+ self.assertEqual(ret, {(ast.Assign, ('# type: int',))})
+
+ def test_non_type_comment_ignored(self):
+ """
+ Test that a non-type comment is ignored
+ """
+ ret = self._collect('x = 1 # noqa')
+ self.assertSetEqual(ret, set())
+
+ def test_type_comment_before_typeable(self):
+ """
+ Test that a type comment before something typeable is ignored.
+ """
+ ret = self._collect('# type: int\nx = 1')
+ self.assertSetEqual(ret, set())
+
+ def test_type_ignore_comment_ignored(self):
+ """
+ Test that `# type: ignore` comments are not collected.
+ """
+ ret = self._collect('x = 1 # type: ignore')
+ self.assertSetEqual(ret, set())
+
+ def test_type_ignore_with_other_things_ignored(self):
+ """
+ Test that `# type: ignore` comments with more content are also not
+ collected.
+ """
+ ret = self._collect('x = 1 # type: ignore # noqa')
+ self.assertSetEqual(ret, set())
+ ret = self._collect('x = 1 #type:ignore#noqa')
+ self.assertSetEqual(ret, set())
+
+ def test_type_comment_with_extra_still_collected(self):
+ ret = self._collect('x = 1 # type: int # noqa')
+ self.assertSetEqual(ret, {(ast.Assign, ('# type: int # noqa',))})
+
+ def test_type_comment_without_whitespace(self):
+ ret = self._collect('x = 1 #type:int')
+ self.assertSetEqual(ret, {(ast.Assign, ('#type:int',))})
+
+ def test_type_comment_starts_with_word_ignore(self):
+ ret = self._collect('x = 1 # type: ignore[T]')
+ self.assertSetEqual(ret, {(ast.Assign, ('# type: ignore[T]',))})
+
+ def test_last_node_wins(self):
+ """
+ Test that when two typeable nodes are present on a line, the last
+ typeable one wins.
+ """
+ ret = self._collect('def f(): x = 1 # type: int')
+ self.assertSetEqual(ret, {(ast.Assign, ('# type: int',))})
+
+ def test_function_def_assigned_comments(self):
+ """
+ Test that type comments for function arguments are all attributed to
+ the function definition.
+ """
+ ret = self._collect(
+ """\
+def f(
+ a, # type: int
+ b, # type: str
+):
+ # type: (...) -> None
+ pass
+"""
+ )
+ expected = {(
+ ast.FunctionDef,
+ ('# type: int', '# type: str', '# type: (...) -> None'),
+ )}
+ self.assertSetEqual(ret, expected)
diff --git a/pyflakes/test/test_other.py b/pyflakes/test/test_other.py
index d0fff0a..f75b3d9 100644
--- a/pyflakes/test/test_other.py
+++ b/pyflakes/test/test_other.py
@@ -2073,6 +2073,72 @@ class TestAsyncStatements(TestCase):
class B: pass
''', m.UndefinedName)
+ def test_typeCommentsMarkImportsAsUsed(self):
+ self.flakes("""
+ from mod import A, B, C, D, E, F, G
+
+
+ def f(
+ a, # type: A
+ ):
+ # type: (...) -> B
+ for b in a: # type: C
+ with b as c: # type: D
+ d = c.x # type: E
+ return d
+
+
+ def g(x): # type: (F) -> G
+ return x.y
+ """)
+
+ def test_typeCommentsFullSignature(self):
+ self.flakes("""
+ from mod import A, B, C, D
+ def f(a, b):
+ # type: (A, B[C]) -> D
+ return a + b
+ """)
+
+ def test_typeCommentsAdditionalComemnt(self):
+ self.flakes("""
+ from mod import F
+
+ x = 1 # type: F # noqa
+ """)
+
+ def test_typeCommentsNoWhitespaceAnnotation(self):
+ self.flakes("""
+ from mod import F
+
+ x = 1 #type:F
+ """)
+
+ def test_typeCommentsInvalidDoesNotMarkAsUsed(self):
+ self.flakes("""
+ from mod import F
+
+ # type: F
+ """, m.UnusedImport)
+
+ def test_typeCommentsSyntaxError(self):
+ self.flakes("""
+ def f(x): # type: (F[) -> None
+ pass
+ """, m.CommentAnnotationSyntaxError)
+
+ def test_typeCommentsAssignedToPreviousNode(self):
+ # This test demonstrates an issue in the implementation which
+ # associates the type comment with a node above it, however the type
+ # comment isn't valid according to mypy. If an improved approach
+ # which can detect these "invalid" type comments is implemented, this
+ # test should be removed / improved to assert that new check.
+ self.flakes("""
+ from mod import F
+ x = 1
+ # type: F
+ """)
+
def test_raise_notimplemented(self):
self.flakes('''
raise NotImplementedError("This is fine")
diff --git a/pyflakes/test/test_undefined_names.py b/pyflakes/test/test_undefined_names.py
index 222ffce..25e28dd 100644
--- a/pyflakes/test/test_undefined_names.py
+++ b/pyflakes/test/test_undefined_names.py
@@ -1,5 +1,4 @@
-
-from _ast import PyCF_ONLY_AST
+import ast
from sys import version_info
from pyflakes import messages as m, checker
@@ -848,7 +847,8 @@ class NameTests(TestCase):
A Name node with an unrecognized context results in a RuntimeError being
raised.
"""
- tree = compile("x = 10", "<test>", "exec", PyCF_ONLY_AST)
+ tree = ast.parse("x = 10")
+ tokens = checker.make_tokens("x = 10")
# Make it into something unrecognizable.
tree.body[0].targets[0].ctx = object()
- self.assertRaises(RuntimeError, checker.Checker, tree)
+ self.assertRaises(RuntimeError, checker.Checker, tree, tokens=tokens)
diff --git a/tox.ini b/tox.ini
index 3a07d70..5821483 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,11 +6,9 @@ envlist =
[testenv]
deps = flake8==3.6.0
commands =
- python setup.py test -q
+ python -m unittest discover pyflakes
flake8 pyflakes setup.py
[flake8]
-select = E,F,W
-ignore = W504
builtins = unicode
max_line_length = 89