diff options
author | Ned Batchelder <ned@nedbatchelder.com> | 2009-11-15 11:12:12 -0500 |
---|---|---|
committer | Ned Batchelder <ned@nedbatchelder.com> | 2009-11-15 11:12:12 -0500 |
commit | 7b49eec72eef2f3c8fda53fc8f3893759257bdf1 (patch) | |
tree | e5efb7eab6a3765cbcb75bd18e16c7a376cb2312 | |
parent | 4316cdbd6b9bc05dddf842d9986560bf208a6f28 (diff) | |
download | python-coveragepy-git-7b49eec72eef2f3c8fda53fc8f3893759257bdf1.tar.gz |
Fix a problem with syntax coloring continued lines, and refactor for testability, and add tests. Fixes issue #30.
-rw-r--r-- | coverage/html.py | 121 | ||||
-rw-r--r-- | coverage/phystokens.py | 101 | ||||
-rw-r--r-- | test/stress_phystoken.txt | 35 | ||||
-rw-r--r-- | test/test_phystokens.py | 58 |
4 files changed, 229 insertions, 86 deletions
diff --git a/coverage/html.py b/coverage/html.py index 23fedd62..37c754bd 100644 --- a/coverage/html.py +++ b/coverage/html.py @@ -1,9 +1,9 @@ """HTML reporting for Coverage.""" -import keyword, os, re, token, tokenize, shutil +import os, re, shutil from coverage import __url__, __version__ # pylint: disable-msg=W0611 -from coverage.backward import StringIO # pylint: disable-msg=W0622 +from coverage.phystokens import source_token_lines from coverage.report import Reporter from coverage.templite import Templite @@ -20,33 +20,6 @@ def data(fname): return open(data_filename(fname)).read() -def phys_tokens(toks): - """Return all physical tokens, even line continuations. - - tokenize.generate_tokens() doesn't return a token for the backslash that - continues lines. This wrapper provides those tokens so that we can - re-create a faithful representation of the original source. - - Returns the same values as generate_tokens() - - """ - last_line = None - last_lineno = -1 - for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks: - if last_lineno != elineno: - if last_line and last_line[-2:] == "\\\n": - if ttype != token.STRING: - ccol = len(last_line.split("\n")[-2]) - 1 - yield ( - 99999, "\\\n", - (slineno, ccol), (slineno, ccol+2), - last_line - ) - last_line = ltext - yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext - last_lineno = elineno - - class HtmlReporter(Reporter): """HTML reporting.""" @@ -88,7 +61,6 @@ class HtmlReporter(Reporter): """Generate an HTML file for one source file.""" source = cu.source_file().read().expandtabs(4) - source_lines = source.split("\n") nums = analysis.numbers @@ -102,63 +74,41 @@ class HtmlReporter(Reporter): c_mis = " mis" c_par = " par" - ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL] lines = [] - line = [] - lineno = 1 - col = 0 - tokgen = tokenize.generate_tokens(StringIO(source).readline) - for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen): - mark_start = True - for part in re.split('(\n)', ttext): - if part == '\n': - - line_class = "" - annotate = "" - if lineno in analysis.statements: - line_class += " stm" - if lineno in analysis.excluded: - line_class += c_exc - elif lineno in analysis.missing: - line_class += c_mis - elif self.arcs and lineno in missing_branch_arcs: - line_class += c_par - n_par += 1 - annotate = " ".join(map(str, missing_branch_arcs[lineno])) - elif lineno in analysis.statements: - line_class += c_run - - lineinfo = { - 'html': "".join(line), - 'number': lineno, - 'class': line_class.strip() or "pln", - 'annotate': annotate, - } - lines.append(lineinfo) - - line = [] - lineno += 1 - col = 0 - mark_end = False - elif part == '': - mark_end = False - elif ttype in ws_tokens: - mark_end = False + + for lineno, line in enumerate(source_token_lines(source)): + lineno += 1 # 1-based line numbers. + # Figure out how to mark this line. + line_class = "" + annotate = "" + if lineno in analysis.statements: + line_class += " stm" + if lineno in analysis.excluded: + line_class += c_exc + elif lineno in analysis.missing: + line_class += c_mis + elif self.arcs and lineno in missing_branch_arcs: + line_class += c_par + n_par += 1 + annotate = " ".join(map(str, missing_branch_arcs[lineno])) + elif lineno in analysis.statements: + line_class += c_run + + # Build the HTML for the line + html = "" + for tok_type, tok_text in line: + if tok_type == "ws": + html += escape(tok_text) else: - if mark_start and scol > col: - line.append(escape(" " * (scol - col))) - mark_start = False - tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3] - if ttype == token.NAME and keyword.iskeyword(ttext): - tok_class = "key" - tok_html = escape(part) or ' ' - line.append( - "<span class='%s'>%s</span>" % (tok_class, tok_html) - ) - mark_end = True - scol = 0 - if mark_end: - col = ecol + tok_html = escape(tok_text) or ' ' + html += "<span class='%s'>%s</span>" % (tok_type, tok_html) + + lines.append({ + 'html': html, + 'number': lineno, + 'class': line_class.strip() or "pln", + 'annotate': annotate, + }) # Write the HTML page for this file. html_filename = cu.flat_rootname() + ".html" @@ -184,7 +134,6 @@ class HtmlReporter(Reporter): arcs = self.arcs totals = sum([f['nums'] for f in files]) - total_par = sum([f['par'] for f in files]) fhtml = open(os.path.join(self.directory, "index.html"), "w") fhtml.write(index_tmpl.render(locals())) diff --git a/coverage/phystokens.py b/coverage/phystokens.py new file mode 100644 index 00000000..7eebb8ad --- /dev/null +++ b/coverage/phystokens.py @@ -0,0 +1,101 @@ +"""Better tokenizing for coverage.py.""" + +import keyword, re, token, tokenize +from coverage.backward import StringIO # pylint: disable-msg=W0622 + +def phys_tokens(toks): + """Return all physical tokens, even line continuations. + + tokenize.generate_tokens() doesn't return a token for the backslash that + continues lines. This wrapper provides those tokens so that we can + re-create a faithful representation of the original source. + + Returns the same values as generate_tokens() + + """ + last_line = None + last_lineno = -1 + for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks: + if last_lineno != elineno: + if last_line and last_line[-2:] == "\\\n": + # We are at the beginning of a new line, and the last line + # ended with a backslash. We probably have to inject a + # backslash token into the stream. Unfortunately, there's more + # to figure out. This code:: + # + # usage = """\ + # HEY THERE + # """ + # + # triggers this condition, but the token text is:: + # + # '"""\\\nHEY THERE\n"""' + # + # so we need to figure out if the backslash is already in the + # string token or not. + inject_backslash = True + if ttype == token.STRING: + if "\n" in ttext and ttext.split('\n', 1)[0][-1] == '\\': + # It's a multiline string and the first line ends with + # a backslash, so we don't need to inject another. + inject_backslash = False + if inject_backslash: + # Figure out what column the backslash is in. + ccol = len(last_line.split("\n")[-2]) - 1 + # Yield the token, with a fake token type. + yield ( + 99999, "\\\n", + (slineno, ccol), (slineno, ccol+2), + last_line + ) + last_line = ltext + yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext + last_lineno = elineno + + +def source_token_lines(source): + """Generate a series of lines, one for each line in `source`. + + Each line is a list of pairs, each pair is a token:: + + [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ] + + Each pair has a token class, and the token text. + + If you concatenate all the token texts, and then join them with newlines, + you should have your original `source` back, with two differences: + trailing whitespace is not preserved, and a final line with no newline + is indistinguishable from a final line with a newline. + + """ + ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL] + line = [] + col = 0 + tokgen = tokenize.generate_tokens(StringIO(source).readline) + for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen): + mark_start = True + for part in re.split('(\n)', ttext): + if part == '\n': + yield line + line = [] + col = 0 + mark_end = False + elif part == '': + mark_end = False + elif ttype in ws_tokens: + mark_end = False + else: + if mark_start and scol > col: + line.append(("ws", " " * (scol - col))) + mark_start = False + tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3] + if ttype == token.NAME and keyword.iskeyword(ttext): + tok_class = "key" + line.append((tok_class, part)) + mark_end = True + scol = 0 + if mark_end: + col = ecol + + if line: + yield line diff --git a/test/stress_phystoken.txt b/test/stress_phystoken.txt new file mode 100644 index 00000000..bd6a453a --- /dev/null +++ b/test/stress_phystoken.txt @@ -0,0 +1,35 @@ +# Here's some random Python so that test_tokenize_myself will have some +# stressful stuff to try. This file is .txt instead of .py so pylint won't +# complain about it. + +first_back = """\ +hey there! +""" + +other_back = """ +hey \ +there +""" + +lots_of_back = """\ +hey \ +there +""" +fake_back = """\ +ouch +""" + +class C(object): + def there(): + this = 5 + \ + 7 + that = \ + "a continued line" + +cont1 = "one line of text" + \ + "another line of text" + +def hello(): + print("Hello world!") + +hello() diff --git a/test/test_phystokens.py b/test/test_phystokens.py new file mode 100644 index 00000000..03f2a929 --- /dev/null +++ b/test/test_phystokens.py @@ -0,0 +1,58 @@ +"""Tests for Coverage.py's improved tokenizer.""" + +import os, re, sys + +sys.path.insert(0, os.path.split(__file__)[0]) # Force relative import for Py3k +from coveragetest import CoverageTest + +from coverage.phystokens import source_token_lines + + +SIMPLE = """\ +# yay! +def foo(): + say('two = %d' % 2) +""" + +HERE = os.path.split(__file__)[0] + +class PhysTokensTest(CoverageTest): + """Tests for Coverage.py's improver tokenizer.""" + + def check_tokenization(self, source): + """Tokenize `source`, then put it back together, should be the same.""" + tokenized = "" + for line in source_token_lines(source): + text = "".join([t for _,t in line]) + tokenized += text + "\n" + source = re.sub("(?m)[ \t]+$", "", source) + tokenized = re.sub("(?m)[ \t]+$", "", tokenized) + #if source != tokenized: + # open(r"c:\foo\0.py", "w").write(source) + # open(r"c:\foo\1.py", "w").write(tokenized) + self.assertEqual(source, tokenized) + + def check_file_tokenization(self, fname): + """Use the contents of `fname` for `check_tokenization`.""" + self.check_tokenization(open(fname).read()) + + def test_simple(self): + self.assertEqual(list(source_token_lines(SIMPLE)), + [ + [('com', "# yay!")], + [('key', 'def'), ('ws', ' '), ('nam', 'foo'), ('op', '('), + ('op', ')'), ('op', ':')], + [('ws', ' '), ('nam', 'say'), ('op', '('), + ('str', "'two = %d'"), ('ws', ' '), ('op', '%'), + ('ws', ' '), ('num', '2'), ('op', ')')] + ] + ) + self.check_tokenization(SIMPLE) + + def test_tokenize_real_file(self): + real_file = os.path.join(HERE, "test_coverage.py") + self.check_file_tokenization(real_file) + + def test_stress(self): + stress = os.path.join(HERE, "stress_phystoken.txt") + self.check_file_tokenization(stress) |