feat: soft keywords are shown in bold in the HTML report

The match and case soft keywords are shown in bold when they are keywords, and not when they are not. The underscore soft keyword is ignored, because it is harder to get right, and because it doesn't look that much different in bold anyway.
author: Ned Batchelder <ned@nedbatchelder.com> 2021-06-06 12:40:47 -0400
committer: Ned Batchelder <ned@nedbatchelder.com> 2021-06-06 12:42:07 -0400
commit: cb09207f6f291696714f5550aacd1e9a3a0e81e1 (patch)
tree: 026f8d44180c494e614708a2861326c8ca489407
parent: 95c582fd8038a7158ff96baff4186f5fb601afd4 (diff)
download: python-coveragepy-git-cb09207f6f291696714f5550aacd1e9a3a0e81e1.tar.gz
3 files changed, 76 insertions, 3 deletions
diff --git a/coverage/env.py b/coverage/env.py
index 89abbb2e..c300f802 100644
--- a/coverage/env.py
+++ b/coverage/env.py
@@ -105,6 +105,10 @@ class PYBEHAVIOR:
     # Match-case construct.
     match_case = (PYVERSION >= (3, 10))
 
+    # Some words are keywords in some places, identifiers in other places.
+    soft_keywords = (PYVERSION >= (3, 10))
+
+
 # Coverage.py specifics.
 
 # Are we using the C-implemented trace function?
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index 52c2aa06..f06c0c27 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -3,11 +3,13 @@
 
 """Better tokenizing for coverage.py."""
 
+import ast
 import keyword
 import re
 import token
 import tokenize
 
+from coverage import env
 from coverage.misc import contract
 
 
@@ -66,6 +68,21 @@ def phys_tokens(toks):
         last_lineno = elineno
 
 
+class MatchCaseFinder(ast.NodeVisitor):
+    """Helper for finding match/case lines."""
+    def __init__(self, source):
+        # This will be the set of line numbers that start match or case statements.
+        self.match_case_lines = set()
+        self.visit(ast.parse(source))
+
+    def visit_Match(self, node):
+        """Invoked by ast.NodeVisitor.visit"""
+        self.match_case_lines.add(node.lineno)
+        for case in node.cases:
+            self.match_case_lines.add(case.pattern.lineno)
+        self.generic_visit(node)
+
+
 @contract(source='unicode')
 def source_token_lines(source):
     """Generate a series of lines, one for each line in `source`.
@@ -90,7 +107,10 @@ def source_token_lines(source):
     source = source.expandtabs(8).replace('\r\n', '\n')
     tokgen = generate_tokens(source)
 
-    for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
+    if env.PYBEHAVIOR.soft_keywords:
+        match_case_lines = MatchCaseFinder(source).match_case_lines
+
+    for ttype, ttext, (sline, scol), (_, ecol), _ in phys_tokens(tokgen):
         mark_start = True
         for part in re.split('(\n)', ttext):
             if part == '\n':
@@ -107,8 +127,21 @@ def source_token_lines(source):
                     line.append(("ws", " " * (scol - col)))
                     mark_start = False
                 tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]
-                if ttype == token.NAME and keyword.iskeyword(ttext):
-                    tok_class = "key"
+                if ttype == token.NAME:
+                    if keyword.iskeyword(ttext):
+                        # Hard keywords are always keywords.
+                        tok_class = "key"
+                    elif env.PYBEHAVIOR.soft_keywords and keyword.issoftkeyword(ttext):
+                        # Soft keywords appear at the start of the line, on lines that start
+                        # match or case statements.
+                        if len(line) == 0:
+                            is_start_of_line = True
+                        elif (len(line) == 1) and line[0][0] == "ws":
+                            is_start_of_line = True
+                        else:
+                            is_start_of_line = False
+                        if is_start_of_line and sline in match_case_lines:
+                            tok_class = "key"
                 line.append((tok_class, part))
                 mark_end = True
             scol = 0
diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py
index 82b887e6..3c214c63 100644
--- a/tests/test_phystokens.py
+++ b/tests/test_phystokens.py
@@ -103,6 +103,42 @@ class PhysTokensTest(CoverageTest):
         self.check_file_tokenization(stress)
 
 
+@pytest.mark.skipif(not env.PYBEHAVIOR.soft_keywords, reason="Soft keywords are new in Python 3.10")
+class SoftKeywordTest(CoverageTest):
+    """Tests the tokenizer handling soft keywords."""
+
+    run_in_temp_dir = False
+
+    def test_soft_keywords(self):
+        source = textwrap.dedent("""\
+            match re.match(something):
+                case ["what"]:
+                    match = case("hello")
+                case [_]:
+                    match("hello")
+                    match another.thing:
+                        case 1:
+                            pass
+
+            class case(): pass
+            def match():
+                global case
+            """)
+        tokens = list(source_token_lines(source))
+        assert tokens[0][0] == ("key", "match")
+        assert tokens[0][4] == ("nam", "match")
+        assert tokens[1][1] == ("key", "case")
+        assert tokens[2][1] == ("nam", "match")
+        assert tokens[2][5] == ("nam", "case")
+        assert tokens[3][1] == ("key", "case")
+        assert tokens[4][1] == ("nam", "match")
+        assert tokens[5][1] == ("key", "match")
+        assert tokens[6][1] == ("key", "case")
+        assert tokens[9][2] == ("nam", "case")
+        assert tokens[10][2] == ("nam", "match")
+        assert tokens[11][3] == ("nam", "case")
+
+
 # The default encoding is different in Python 2 and Python 3.
 DEF_ENCODING = "utf-8"
author	Ned Batchelder <ned@nedbatchelder.com>	2021-06-06 12:40:47 -0400
committer	Ned Batchelder <ned@nedbatchelder.com>	2021-06-06 12:42:07 -0400
commit	cb09207f6f291696714f5550aacd1e9a3a0e81e1 (patch)
tree	026f8d44180c494e614708a2861326c8ca489407
parent	95c582fd8038a7158ff96baff4186f5fb601afd4 (diff)
download	python-coveragepy-git-cb09207f6f291696714f5550aacd1e9a3a0e81e1.tar.gz