summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--coverage/html.py121
-rw-r--r--coverage/phystokens.py101
-rw-r--r--test/stress_phystoken.txt35
-rw-r--r--test/test_phystokens.py58
4 files changed, 229 insertions, 86 deletions
diff --git a/coverage/html.py b/coverage/html.py
index 23fedd62..37c754bd 100644
--- a/coverage/html.py
+++ b/coverage/html.py
@@ -1,9 +1,9 @@
"""HTML reporting for Coverage."""
-import keyword, os, re, token, tokenize, shutil
+import os, re, shutil
from coverage import __url__, __version__ # pylint: disable-msg=W0611
-from coverage.backward import StringIO # pylint: disable-msg=W0622
+from coverage.phystokens import source_token_lines
from coverage.report import Reporter
from coverage.templite import Templite
@@ -20,33 +20,6 @@ def data(fname):
return open(data_filename(fname)).read()
-def phys_tokens(toks):
- """Return all physical tokens, even line continuations.
-
- tokenize.generate_tokens() doesn't return a token for the backslash that
- continues lines. This wrapper provides those tokens so that we can
- re-create a faithful representation of the original source.
-
- Returns the same values as generate_tokens()
-
- """
- last_line = None
- last_lineno = -1
- for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks:
- if last_lineno != elineno:
- if last_line and last_line[-2:] == "\\\n":
- if ttype != token.STRING:
- ccol = len(last_line.split("\n")[-2]) - 1
- yield (
- 99999, "\\\n",
- (slineno, ccol), (slineno, ccol+2),
- last_line
- )
- last_line = ltext
- yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext
- last_lineno = elineno
-
-
class HtmlReporter(Reporter):
"""HTML reporting."""
@@ -88,7 +61,6 @@ class HtmlReporter(Reporter):
"""Generate an HTML file for one source file."""
source = cu.source_file().read().expandtabs(4)
- source_lines = source.split("\n")
nums = analysis.numbers
@@ -102,63 +74,41 @@ class HtmlReporter(Reporter):
c_mis = " mis"
c_par = " par"
- ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]
lines = []
- line = []
- lineno = 1
- col = 0
- tokgen = tokenize.generate_tokens(StringIO(source).readline)
- for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
- mark_start = True
- for part in re.split('(\n)', ttext):
- if part == '\n':
-
- line_class = ""
- annotate = ""
- if lineno in analysis.statements:
- line_class += " stm"
- if lineno in analysis.excluded:
- line_class += c_exc
- elif lineno in analysis.missing:
- line_class += c_mis
- elif self.arcs and lineno in missing_branch_arcs:
- line_class += c_par
- n_par += 1
- annotate = " ".join(map(str, missing_branch_arcs[lineno]))
- elif lineno in analysis.statements:
- line_class += c_run
-
- lineinfo = {
- 'html': "".join(line),
- 'number': lineno,
- 'class': line_class.strip() or "pln",
- 'annotate': annotate,
- }
- lines.append(lineinfo)
-
- line = []
- lineno += 1
- col = 0
- mark_end = False
- elif part == '':
- mark_end = False
- elif ttype in ws_tokens:
- mark_end = False
+
+ for lineno, line in enumerate(source_token_lines(source)):
+ lineno += 1 # 1-based line numbers.
+ # Figure out how to mark this line.
+ line_class = ""
+ annotate = ""
+ if lineno in analysis.statements:
+ line_class += " stm"
+ if lineno in analysis.excluded:
+ line_class += c_exc
+ elif lineno in analysis.missing:
+ line_class += c_mis
+ elif self.arcs and lineno in missing_branch_arcs:
+ line_class += c_par
+ n_par += 1
+ annotate = " ".join(map(str, missing_branch_arcs[lineno]))
+ elif lineno in analysis.statements:
+ line_class += c_run
+
+ # Build the HTML for the line
+ html = ""
+ for tok_type, tok_text in line:
+ if tok_type == "ws":
+ html += escape(tok_text)
else:
- if mark_start and scol > col:
- line.append(escape(" " * (scol - col)))
- mark_start = False
- tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]
- if ttype == token.NAME and keyword.iskeyword(ttext):
- tok_class = "key"
- tok_html = escape(part) or ' '
- line.append(
- "<span class='%s'>%s</span>" % (tok_class, tok_html)
- )
- mark_end = True
- scol = 0
- if mark_end:
- col = ecol
+ tok_html = escape(tok_text) or '&nbsp;'
+ html += "<span class='%s'>%s</span>" % (tok_type, tok_html)
+
+ lines.append({
+ 'html': html,
+ 'number': lineno,
+ 'class': line_class.strip() or "pln",
+ 'annotate': annotate,
+ })
# Write the HTML page for this file.
html_filename = cu.flat_rootname() + ".html"
@@ -184,7 +134,6 @@ class HtmlReporter(Reporter):
arcs = self.arcs
totals = sum([f['nums'] for f in files])
- total_par = sum([f['par'] for f in files])
fhtml = open(os.path.join(self.directory, "index.html"), "w")
fhtml.write(index_tmpl.render(locals()))
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
new file mode 100644
index 00000000..7eebb8ad
--- /dev/null
+++ b/coverage/phystokens.py
@@ -0,0 +1,101 @@
+"""Better tokenizing for coverage.py."""
+
+import keyword, re, token, tokenize
+from coverage.backward import StringIO # pylint: disable-msg=W0622
+
+def phys_tokens(toks):
+ """Return all physical tokens, even line continuations.
+
+ tokenize.generate_tokens() doesn't return a token for the backslash that
+ continues lines. This wrapper provides those tokens so that we can
+ re-create a faithful representation of the original source.
+
+ Returns the same values as generate_tokens()
+
+ """
+ last_line = None
+ last_lineno = -1
+ for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks:
+ if last_lineno != elineno:
+ if last_line and last_line[-2:] == "\\\n":
+ # We are at the beginning of a new line, and the last line
+ # ended with a backslash. We probably have to inject a
+ # backslash token into the stream. Unfortunately, there's more
+ # to figure out. This code::
+ #
+ # usage = """\
+ # HEY THERE
+ # """
+ #
+ # triggers this condition, but the token text is::
+ #
+ # '"""\\\nHEY THERE\n"""'
+ #
+ # so we need to figure out if the backslash is already in the
+ # string token or not.
+ inject_backslash = True
+ if ttype == token.STRING:
+ if "\n" in ttext and ttext.split('\n', 1)[0][-1] == '\\':
+ # It's a multiline string and the first line ends with
+ # a backslash, so we don't need to inject another.
+ inject_backslash = False
+ if inject_backslash:
+ # Figure out what column the backslash is in.
+ ccol = len(last_line.split("\n")[-2]) - 1
+ # Yield the token, with a fake token type.
+ yield (
+ 99999, "\\\n",
+ (slineno, ccol), (slineno, ccol+2),
+ last_line
+ )
+ last_line = ltext
+ yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext
+ last_lineno = elineno
+
+
+def source_token_lines(source):
+ """Generate a series of lines, one for each line in `source`.
+
+ Each line is a list of pairs, each pair is a token::
+
+ [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]
+
+ Each pair has a token class, and the token text.
+
+ If you concatenate all the token texts, and then join them with newlines,
+ you should have your original `source` back, with two differences:
+ trailing whitespace is not preserved, and a final line with no newline
+ is indistinguishable from a final line with a newline.
+
+ """
+ ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]
+ line = []
+ col = 0
+ tokgen = tokenize.generate_tokens(StringIO(source).readline)
+ for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
+ mark_start = True
+ for part in re.split('(\n)', ttext):
+ if part == '\n':
+ yield line
+ line = []
+ col = 0
+ mark_end = False
+ elif part == '':
+ mark_end = False
+ elif ttype in ws_tokens:
+ mark_end = False
+ else:
+ if mark_start and scol > col:
+ line.append(("ws", " " * (scol - col)))
+ mark_start = False
+ tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]
+ if ttype == token.NAME and keyword.iskeyword(ttext):
+ tok_class = "key"
+ line.append((tok_class, part))
+ mark_end = True
+ scol = 0
+ if mark_end:
+ col = ecol
+
+ if line:
+ yield line
diff --git a/test/stress_phystoken.txt b/test/stress_phystoken.txt
new file mode 100644
index 00000000..bd6a453a
--- /dev/null
+++ b/test/stress_phystoken.txt
@@ -0,0 +1,35 @@
+# Here's some random Python so that test_tokenize_myself will have some
+# stressful stuff to try. This file is .txt instead of .py so pylint won't
+# complain about it.
+
+first_back = """\
+hey there!
+"""
+
+other_back = """
+hey \
+there
+"""
+
+lots_of_back = """\
+hey \
+there
+"""
+fake_back = """\
+ouch
+"""
+
+class C(object):
+ def there():
+ this = 5 + \
+ 7
+ that = \
+ "a continued line"
+
+cont1 = "one line of text" + \
+ "another line of text"
+
+def hello():
+ print("Hello world!")
+
+hello()
diff --git a/test/test_phystokens.py b/test/test_phystokens.py
new file mode 100644
index 00000000..03f2a929
--- /dev/null
+++ b/test/test_phystokens.py
@@ -0,0 +1,58 @@
+"""Tests for Coverage.py's improved tokenizer."""
+
+import os, re, sys
+
+sys.path.insert(0, os.path.split(__file__)[0]) # Force relative import for Py3k
+from coveragetest import CoverageTest
+
+from coverage.phystokens import source_token_lines
+
+
+SIMPLE = """\
+# yay!
+def foo():
+ say('two = %d' % 2)
+"""
+
+HERE = os.path.split(__file__)[0]
+
+class PhysTokensTest(CoverageTest):
+ """Tests for Coverage.py's improver tokenizer."""
+
+ def check_tokenization(self, source):
+ """Tokenize `source`, then put it back together, should be the same."""
+ tokenized = ""
+ for line in source_token_lines(source):
+ text = "".join([t for _,t in line])
+ tokenized += text + "\n"
+ source = re.sub("(?m)[ \t]+$", "", source)
+ tokenized = re.sub("(?m)[ \t]+$", "", tokenized)
+ #if source != tokenized:
+ # open(r"c:\foo\0.py", "w").write(source)
+ # open(r"c:\foo\1.py", "w").write(tokenized)
+ self.assertEqual(source, tokenized)
+
+ def check_file_tokenization(self, fname):
+ """Use the contents of `fname` for `check_tokenization`."""
+ self.check_tokenization(open(fname).read())
+
+ def test_simple(self):
+ self.assertEqual(list(source_token_lines(SIMPLE)),
+ [
+ [('com', "# yay!")],
+ [('key', 'def'), ('ws', ' '), ('nam', 'foo'), ('op', '('),
+ ('op', ')'), ('op', ':')],
+ [('ws', ' '), ('nam', 'say'), ('op', '('),
+ ('str', "'two = %d'"), ('ws', ' '), ('op', '%'),
+ ('ws', ' '), ('num', '2'), ('op', ')')]
+ ]
+ )
+ self.check_tokenization(SIMPLE)
+
+ def test_tokenize_real_file(self):
+ real_file = os.path.join(HERE, "test_coverage.py")
+ self.check_file_tokenization(real_file)
+
+ def test_stress(self):
+ stress = os.path.join(HERE, "stress_phystoken.txt")
+ self.check_file_tokenization(stress)