From ac0aeba0eddb0b8e2285f5ac568e4621426b3aa7 Mon Sep 17 00:00:00 2001
From: Anthony Sottile <asottile@umich.edu>
Date: Mon, 31 Dec 2018 22:29:32 -0800
Subject: Parse PEP 484 type comments

---
 .appveyor.yml                         |   4 +-
 .travis.yml                           |   8 +-
 pyflakes/api.py                       |  10 +-
 pyflakes/checker.py                   | 156 +++++++++++++++++++++++-----
 pyflakes/messages.py                  |   8 ++
 pyflakes/test/harness.py              |   8 +-
 pyflakes/test/test_checker.py         | 186 ++++++++++++++++++++++++++++++++++
 pyflakes/test/test_other.py           |  66 ++++++++++++
 pyflakes/test/test_undefined_names.py |   8 +-
 tox.ini                               |   4 +-
 10 files changed, 406 insertions(+), 52 deletions(-)
 create mode 100644 pyflakes/test/test_checker.py

diff --git a/.appveyor.yml b/.appveyor.yml
index 5739e3b..f7d7aa2 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -44,5 +44,5 @@ build: off
 
 test_script:
   - python -m tox
-#  - C:\pypy3-v5.10.0-win32\pypy3 setup.py test -q
-  - C:\pypy-2.6.1-win32\pypy setup.py test -q
+#  - C:\pypy3-v5.10.0-win32\pypy3 -m unittest discover pyflakes
+  - C:\pypy-2.6.1-win32\pypy -m unittest discover pyflakes
diff --git a/.travis.yml b/.travis.yml
index 5d92e4f..20e195d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,9 +13,5 @@ matrix:
       dist: xenial
     - python: nightly
       dist: xenial
-install:
-  - pip install --upgrade .
-  - pip list
-script:
-  - python setup.py test -q
-  - if [ "$TRAVIS_PYTHON_VERSION" != "nightly" ]; then pip install flake8==3.6.0 && flake8 --version && flake8 pyflakes setup.py; fi
+install: pip install tox
+script: tox -e py
diff --git a/pyflakes/api.py b/pyflakes/api.py
index 3ddb386..72d6b6c 100644
--- a/pyflakes/api.py
+++ b/pyflakes/api.py
@@ -3,18 +3,17 @@ API for the command-line I{pyflakes} tool.
 """
 from __future__ import with_statement
 
-import sys
+import ast
 import os
 import platform
 import re
-import _ast
+import sys
 
 from pyflakes import checker, __version__
 from pyflakes import reporter as modReporter
 
 __all__ = ['check', 'checkPath', 'checkRecursive', 'iterSourceCode', 'main']
 
-
 PYTHON_SHEBANG_REGEX = re.compile(br'^#!.*\bpython[23w]?\b\s*$')
 
 
@@ -39,7 +38,7 @@ def check(codeString, filename, reporter=None):
         reporter = modReporter._makeDefaultReporter()
     # First, compile into an AST and handle syntax errors.
     try:
-        tree = compile(codeString, filename, "exec", _ast.PyCF_ONLY_AST)
+        tree = ast.parse(codeString, filename=filename)
     except SyntaxError:
         value = sys.exc_info()[1]
         msg = value.args[0]
@@ -71,7 +70,8 @@ def check(codeString, filename, reporter=None):
         reporter.unexpectedError(filename, 'problem decoding source')
         return 1
     # Okay, it's syntactically valid.  Now check it.
-    w = checker.Checker(tree, filename)
+    tokens = checker.make_tokens(codeString)
+    w = checker.Checker(tree, tokens=tokens, filename=filename)
     w.messages.sort(key=lambda m: m.lineno)
     for warning in w.messages:
         reporter.flake(warning)
diff --git a/pyflakes/checker.py b/pyflakes/checker.py
index a090d8a..3c4fa0f 100644
--- a/pyflakes/checker.py
+++ b/pyflakes/checker.py
@@ -6,9 +6,14 @@ Also, it models the Bindings and Scopes.
 """
 import __future__
 import ast
+import bisect
+import collections
 import doctest
+import functools
 import os
+import re
 import sys
+import tokenize
 
 from pyflakes import messages
 
@@ -23,6 +28,10 @@ except AttributeError:
 
 builtin_vars = dir(__import__('__builtin__' if PY2 else 'builtins'))
 
+if PY2:
+    tokenize_tokenize = tokenize.generate_tokens
+else:
+    tokenize_tokenize = tokenize.tokenize
 
 if PY2:
     def getNodeType(node_class):
@@ -63,6 +72,13 @@ else:
     FOR_TYPES = (ast.For,)
     LOOP_TYPES = (ast.While, ast.For)
 
+# https://github.com/python/typed_ast/blob/55420396/ast27/Parser/tokenizer.c#L102-L104
+TYPE_COMMENT_RE = re.compile(r'^#\s*type:\s*')
+# https://github.com/python/typed_ast/blob/55420396/ast27/Parser/tokenizer.c#L1400
+TYPE_IGNORE_RE = re.compile(TYPE_COMMENT_RE.pattern + r'ignore\s*(#|$)')
+# https://github.com/python/typed_ast/blob/55420396/ast27/Grammar/Grammar#L147
+TYPE_FUNC_RE = re.compile(r'^(\(.*?\))\s*->\s*(.*)$')
+
 
 class _FieldsOrder(dict):
     """Fix order of AST node fields."""
@@ -522,6 +538,63 @@ def is_typing_overload(value, scope):
     )
 
 
+def make_tokens(code):
+    # PY3: tokenize.tokenize requires readline of bytes
+    if not isinstance(code, bytes):
+        code = code.encode('UTF-8')
+    lines = iter(code.splitlines(True))
+    # next(lines, b'') is to prevent an error in pypy3
+    return tuple(tokenize_tokenize(lambda: next(lines, b'')))
+
+
+class _TypeableVisitor(ast.NodeVisitor):
+    """Collect the line number and nodes which are deemed typeable by
+    PEP 484
+
+    https://www.python.org/dev/peps/pep-0484/#type-comments
+    """
+    def __init__(self):
+        self.typeable_lines = []  # type: List[int]
+        self.typeable_nodes = {}  # type: Dict[int, ast.AST]
+
+    def _typeable(self, node):
+        # if there is more than one typeable thing on a line last one wins
+        self.typeable_lines.append(node.lineno)
+        self.typeable_nodes[node.lineno] = node
+
+        self.generic_visit(node)
+
+    visit_Assign = visit_For = visit_FunctionDef = visit_With = _typeable
+    visit_AsyncFor = visit_AsyncFunctionDef = visit_AsyncWith = _typeable
+
+
+def _collect_type_comments(tree, tokens):
+    visitor = _TypeableVisitor()
+    visitor.visit(tree)
+
+    type_comments = collections.defaultdict(list)
+    for tp, text, start, _, _ in tokens:
+        if (
+                tp != tokenize.COMMENT or  # skip non comments
+                not TYPE_COMMENT_RE.match(text) or  # skip non-type comments
+                TYPE_IGNORE_RE.match(text)  # skip ignores
+        ):
+            continue
+
+        # search for the typeable node at or before the line number of the
+        # type comment.
+        # if the bisection insertion point is before any nodes this is an
+        # invalid type comment which is ignored.
+        lineno, _ = start
+        idx = bisect.bisect_right(visitor.typeable_lines, lineno)
+        if idx == 0:
+            continue
+        node = visitor.typeable_nodes[visitor.typeable_lines[idx - 1]]
+        type_comments[node].append((start, text))
+
+    return type_comments
+
+
 class Checker(object):
     """
     I check the cleanliness and sanity of Python code.
@@ -556,8 +629,11 @@ class Checker(object):
         builtIns.update(_customBuiltIns.split(','))
     del _customBuiltIns
 
+    # TODO: tokens= is required to perform checks on type comments, eventually
+    #       make this a required positional argument.  For now it is defaulted
+    #       to `()` for api compatibility.
     def __init__(self, tree, filename='(none)', builtins=None,
-                 withDoctest='PYFLAKES_DOCTEST' in os.environ):
+                 withDoctest='PYFLAKES_DOCTEST' in os.environ, tokens=()):
         self._nodeHandlers = {}
         self._deferredFunctions = []
         self._deferredAssignments = []
@@ -573,6 +649,7 @@ class Checker(object):
             raise RuntimeError('No scope implemented for the node %r' % tree)
         self.exceptHandlers = [()]
         self.root = tree
+        self._type_comments = _collect_type_comments(tree, tokens)
         for builtin in self.builtIns:
             self.addBinding(None, Builtin(builtin))
         self.handleChildren(tree)
@@ -952,7 +1029,26 @@ class Checker(object):
             except KeyError:
                 self.report(messages.UndefinedName, node, name)
 
+    def _handle_type_comments(self, node):
+        for (lineno, col_offset), comment in self._type_comments.get(node, ()):
+            comment = comment.split(':', 1)[1].strip()
+            func_match = TYPE_FUNC_RE.match(comment)
+            if func_match:
+                parts = (func_match.group(1), func_match.group(2).strip())
+            else:
+                parts = (comment,)
+
+            for part in parts:
+                if PY2:
+                    part = part.replace('...', 'Ellipsis')
+                self.deferFunction(functools.partial(
+                    self.handleStringAnnotation,
+                    part, node, lineno, col_offset,
+                    messages.CommentAnnotationSyntaxError,
+                ))
+
     def handleChildren(self, tree, omit=None):
+        self._handle_type_comments(tree)
         for node in iter_child_nodes(tree, omit=omit):
             self.handleNode(node, tree)
 
@@ -1040,7 +1136,7 @@ class Checker(object):
         self.addBinding(None, Builtin('_'))
         for example in examples:
             try:
-                tree = compile(example.source, "<doctest>", "exec", ast.PyCF_ONLY_AST)
+                tree = ast.parse(example.source, "<doctest>")
             except SyntaxError:
                 e = sys.exc_info()[1]
                 if PYPY:
@@ -1056,36 +1152,40 @@ class Checker(object):
         self.popScope()
         self.scopeStack = saved_stack
 
-    def handleAnnotation(self, annotation, node):
-        if isinstance(annotation, ast.Str):
-            # Defer handling forward annotation.
-            def handleForwardAnnotation():
-                try:
-                    tree = ast.parse(annotation.s)
-                except SyntaxError:
-                    self.report(
-                        messages.ForwardAnnotationSyntaxError,
-                        node,
-                        annotation.s,
-                    )
-                    return
+    def handleStringAnnotation(self, s, node, ref_lineno, ref_col_offset, err):
+        try:
+            tree = ast.parse(s)
+        except SyntaxError:
+            self.report(err, node, s)
+            return
 
-                body = tree.body
-                if len(body) != 1 or not isinstance(body[0], ast.Expr):
-                    self.report(
-                        messages.ForwardAnnotationSyntaxError,
-                        node,
-                        annotation.s,
-                    )
-                    return
+        body = tree.body
+        if len(body) != 1 or not isinstance(body[0], ast.Expr):
+            self.report(err, node, s)
+            return
 
-                parsed_annotation = tree.body[0].value
-                for descendant in ast.walk(parsed_annotation):
-                    ast.copy_location(descendant, annotation)
+        parsed_annotation = tree.body[0].value
+        for descendant in ast.walk(parsed_annotation):
+            if (
+                    'lineno' in descendant._attributes and
+                    'col_offset' in descendant._attributes
+            ):
+                descendant.lineno = ref_lineno
+                descendant.col_offset = ref_col_offset
 
-                self.handleNode(parsed_annotation, node)
+        self.handleNode(parsed_annotation, node)
 
-            self.deferFunction(handleForwardAnnotation)
+    def handleAnnotation(self, annotation, node):
+        if isinstance(annotation, ast.Str):
+            # Defer handling forward annotation.
+            self.deferFunction(functools.partial(
+                self.handleStringAnnotation,
+                annotation.s,
+                node,
+                annotation.lineno,
+                annotation.col_offset,
+                messages.ForwardAnnotationSyntaxError,
+            ))
         elif self.annotationsFutureEnabled:
             self.deferFunction(lambda: self.handleNode(annotation, node))
         else:
diff --git a/pyflakes/messages.py b/pyflakes/messages.py
index 4756872..4d3c7d7 100644
--- a/pyflakes/messages.py
+++ b/pyflakes/messages.py
@@ -248,6 +248,14 @@ class ForwardAnnotationSyntaxError(Message):
         self.message_args = (annotation,)
 
 
+class CommentAnnotationSyntaxError(Message):
+    message = 'syntax error in type comment %r'
+
+    def __init__(self, filename, loc, annotation):
+        Message.__init__(self, filename, loc)
+        self.message_args = (annotation,)
+
+
 class RaiseNotImplemented(Message):
     message = "'raise NotImplemented' should be 'raise NotImplementedError'"
 
diff --git a/pyflakes/test/harness.py b/pyflakes/test/harness.py
index 0a58bd5..d375ea3 100644
--- a/pyflakes/test/harness.py
+++ b/pyflakes/test/harness.py
@@ -1,4 +1,4 @@
-
+import ast
 import textwrap
 import unittest
 
@@ -8,7 +8,6 @@ __all__ = ['TestCase', 'skip', 'skipIf']
 
 skip = unittest.skip
 skipIf = unittest.skipIf
-PyCF_ONLY_AST = 1024
 
 
 class TestCase(unittest.TestCase):
@@ -16,11 +15,12 @@ class TestCase(unittest.TestCase):
     withDoctest = False
 
     def flakes(self, input, *expectedOutputs, **kw):
-        tree = compile(textwrap.dedent(input), "<test>", "exec", PyCF_ONLY_AST)
+        tree = ast.parse(textwrap.dedent(input))
+        tokens = checker.make_tokens(textwrap.dedent(input))
         if kw.get('is_segment'):
             tree = tree.body[0]
             kw.pop('is_segment')
-        w = checker.Checker(tree, withDoctest=self.withDoctest, **kw)
+        w = checker.Checker(tree, tokens=tokens, withDoctest=self.withDoctest, **kw)
         outputs = [type(o) for o in w.messages]
         expectedOutputs = list(expectedOutputs)
         outputs.sort(key=lambda t: t.__name__)
diff --git a/pyflakes/test/test_checker.py b/pyflakes/test/test_checker.py
new file mode 100644
index 0000000..f47588d
--- /dev/null
+++ b/pyflakes/test/test_checker.py
@@ -0,0 +1,186 @@
+import ast
+import sys
+
+from pyflakes import checker
+from pyflakes.test.harness import TestCase, skipIf
+
+
+class TypeableVisitorTests(TestCase):
+    """
+    Tests of L{_TypeableVisitor}
+    """
+
+    @staticmethod
+    def _run_visitor(s):
+        """
+        Run L{_TypeableVisitor} on the parsed source and return the visitor.
+        """
+        tree = ast.parse(s)
+        visitor = checker._TypeableVisitor()
+        visitor.visit(tree)
+        return visitor
+
+    def test_node_types(self):
+        """
+        Test that the typeable node types are collected
+        """
+        visitor = self._run_visitor(
+            """\
+x = 1  # assignment
+for x in range(1): pass  # for loop
+def f(): pass  # function definition
+with a as b: pass  # with statement
+"""
+        )
+        self.assertEqual(visitor.typeable_lines, [1, 2, 3, 4])
+        self.assertIsInstance(visitor.typeable_nodes[1], ast.Assign)
+        self.assertIsInstance(visitor.typeable_nodes[2], ast.For)
+        self.assertIsInstance(visitor.typeable_nodes[3], ast.FunctionDef)
+        self.assertIsInstance(visitor.typeable_nodes[4], ast.With)
+
+    def test_visitor_recurses(self):
+        """
+        Test the common pitfall of missing `generic_visit` in visitors by
+        ensuring that nested nodes are reported
+        """
+        visitor = self._run_visitor(
+            """\
+def f():
+    x = 1
+"""
+        )
+        self.assertEqual(visitor.typeable_lines, [1, 2])
+        self.assertIsInstance(visitor.typeable_nodes[1], ast.FunctionDef)
+        self.assertIsInstance(visitor.typeable_nodes[2], ast.Assign)
+
+    @skipIf(sys.version_info < (3, 5), 'async syntax introduced in py35')
+    def test_py35_node_types(self):
+        """
+        Test that the PEP 492 node types are collected
+        """
+        visitor = self._run_visitor(
+            """\
+async def f():  # async def
+    async for x in y:  pass  # async for
+    async with a as b: pass  # async with
+"""
+        )
+        self.assertEqual(visitor.typeable_lines, [1, 2, 3])
+        self.assertIsInstance(visitor.typeable_nodes[1], ast.AsyncFunctionDef)
+        self.assertIsInstance(visitor.typeable_nodes[2], ast.AsyncFor)
+        self.assertIsInstance(visitor.typeable_nodes[3], ast.AsyncWith)
+
+    def test_last_node_wins(self):
+        """
+        Test that when two typeable nodes are present on a line, the last
+        typeable one wins.
+        """
+        visitor = self._run_visitor('x = 1; y = 1')
+        # detected both assignable nodes
+        self.assertEqual(visitor.typeable_lines, [1, 1])
+        # but the assignment to `y` wins
+        self.assertEqual(visitor.typeable_nodes[1].targets[0].id, 'y')
+
+
+class CollectTypeCommentsTests(TestCase):
+    """
+    Tests of L{_collect_type_comments}
+    """
+
+    @staticmethod
+    def _collect(s):
+        """
+        Run L{_collect_type_comments} on the parsed source and return the
+        mapping from nodes to comments.  The return value is converted to
+        a set: {(node_type, tuple of comments), ...}
+        """
+        tree = ast.parse(s)
+        tokens = checker.make_tokens(s)
+        ret = checker._collect_type_comments(tree, tokens)
+        return {(type(k), tuple(s for _, s in v)) for k, v in ret.items()}
+
+    def test_bytes(self):
+        """
+        Test that the function works for binary source
+        """
+        ret = self._collect(b'x = 1  # type: int')
+        self.assertSetEqual(ret, {(ast.Assign, ('# type: int',))})
+
+    def test_text(self):
+        """
+        Test that the function works for text source
+        """
+        ret = self._collect(u'x = 1  # type: int')
+        self.assertEqual(ret, {(ast.Assign, ('# type: int',))})
+
+    def test_non_type_comment_ignored(self):
+        """
+        Test that a non-type comment is ignored
+        """
+        ret = self._collect('x = 1  # noqa')
+        self.assertSetEqual(ret, set())
+
+    def test_type_comment_before_typeable(self):
+        """
+        Test that a type comment before something typeable is ignored.
+        """
+        ret = self._collect('# type: int\nx = 1')
+        self.assertSetEqual(ret, set())
+
+    def test_type_ignore_comment_ignored(self):
+        """
+        Test that `# type: ignore` comments are not collected.
+        """
+        ret = self._collect('x = 1  # type: ignore')
+        self.assertSetEqual(ret, set())
+
+    def test_type_ignore_with_other_things_ignored(self):
+        """
+        Test that `# type: ignore` comments with more content are also not
+        collected.
+        """
+        ret = self._collect('x = 1  # type: ignore # noqa')
+        self.assertSetEqual(ret, set())
+        ret = self._collect('x = 1  #type:ignore#noqa')
+        self.assertSetEqual(ret, set())
+
+    def test_type_comment_with_extra_still_collected(self):
+        ret = self._collect('x = 1  # type: int  # noqa')
+        self.assertSetEqual(ret, {(ast.Assign, ('# type: int  # noqa',))})
+
+    def test_type_comment_without_whitespace(self):
+        ret = self._collect('x = 1 #type:int')
+        self.assertSetEqual(ret, {(ast.Assign, ('#type:int',))})
+
+    def test_type_comment_starts_with_word_ignore(self):
+        ret = self._collect('x = 1 # type: ignore[T]')
+        self.assertSetEqual(ret, {(ast.Assign, ('# type: ignore[T]',))})
+
+    def test_last_node_wins(self):
+        """
+        Test that when two typeable nodes are present on a line, the last
+        typeable one wins.
+        """
+        ret = self._collect('def f(): x = 1  # type: int')
+        self.assertSetEqual(ret, {(ast.Assign, ('# type: int',))})
+
+    def test_function_def_assigned_comments(self):
+        """
+        Test that type comments for function arguments are all attributed to
+        the function definition.
+        """
+        ret = self._collect(
+            """\
+def f(
+        a,  # type: int
+        b,  # type: str
+):
+    # type: (...) -> None
+    pass
+"""
+        )
+        expected = {(
+            ast.FunctionDef,
+            ('# type: int', '# type: str', '# type: (...) -> None'),
+        )}
+        self.assertSetEqual(ret, expected)
diff --git a/pyflakes/test/test_other.py b/pyflakes/test/test_other.py
index d0fff0a..f75b3d9 100644
--- a/pyflakes/test/test_other.py
+++ b/pyflakes/test/test_other.py
@@ -2073,6 +2073,72 @@ class TestAsyncStatements(TestCase):
         class B: pass
         ''', m.UndefinedName)
 
+    def test_typeCommentsMarkImportsAsUsed(self):
+        self.flakes("""
+        from mod import A, B, C, D, E, F, G
+
+
+        def f(
+            a,  # type: A
+        ):
+            # type: (...) -> B
+            for b in a:  # type: C
+                with b as c:  # type: D
+                    d = c.x  # type: E
+                    return d
+
+
+        def g(x):  # type: (F) -> G
+            return x.y
+        """)
+
+    def test_typeCommentsFullSignature(self):
+        self.flakes("""
+        from mod import A, B, C, D
+        def f(a, b):
+            # type: (A, B[C]) -> D
+            return a + b
+        """)
+
+    def test_typeCommentsAdditionalComemnt(self):
+        self.flakes("""
+        from mod import F
+
+        x = 1 # type: F  # noqa
+        """)
+
+    def test_typeCommentsNoWhitespaceAnnotation(self):
+        self.flakes("""
+        from mod import F
+
+        x = 1  #type:F
+        """)
+
+    def test_typeCommentsInvalidDoesNotMarkAsUsed(self):
+        self.flakes("""
+        from mod import F
+
+        # type: F
+        """, m.UnusedImport)
+
+    def test_typeCommentsSyntaxError(self):
+        self.flakes("""
+        def f(x):  # type: (F[) -> None
+            pass
+        """, m.CommentAnnotationSyntaxError)
+
+    def test_typeCommentsAssignedToPreviousNode(self):
+        # This test demonstrates an issue in the implementation which
+        # associates the type comment with a node above it, however the type
+        # comment isn't valid according to mypy.  If an improved approach
+        # which can detect these "invalid" type comments is implemented, this
+        # test should be removed / improved to assert that new check.
+        self.flakes("""
+        from mod import F
+        x = 1
+        # type: F
+        """)
+
     def test_raise_notimplemented(self):
         self.flakes('''
         raise NotImplementedError("This is fine")
diff --git a/pyflakes/test/test_undefined_names.py b/pyflakes/test/test_undefined_names.py
index 222ffce..25e28dd 100644
--- a/pyflakes/test/test_undefined_names.py
+++ b/pyflakes/test/test_undefined_names.py
@@ -1,5 +1,4 @@
-
-from _ast import PyCF_ONLY_AST
+import ast
 from sys import version_info
 
 from pyflakes import messages as m, checker
@@ -848,7 +847,8 @@ class NameTests(TestCase):
         A Name node with an unrecognized context results in a RuntimeError being
         raised.
         """
-        tree = compile("x = 10", "<test>", "exec", PyCF_ONLY_AST)
+        tree = ast.parse("x = 10")
+        tokens = checker.make_tokens("x = 10")
         # Make it into something unrecognizable.
         tree.body[0].targets[0].ctx = object()
-        self.assertRaises(RuntimeError, checker.Checker, tree)
+        self.assertRaises(RuntimeError, checker.Checker, tree, tokens=tokens)
diff --git a/tox.ini b/tox.ini
index 3a07d70..5821483 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,11 +6,9 @@ envlist =
 [testenv]
 deps = flake8==3.6.0
 commands =
-    python setup.py test -q
+    python -m unittest discover pyflakes
     flake8 pyflakes setup.py
 
 [flake8]
-select = E,F,W
-ignore = W504
 builtins = unicode
 max_line_length = 89
-- 
cgit v1.2.1