diff options
author | Ned Batchelder <nedbat@gmail.com> | 2015-08-01 12:53:22 -0400 |
---|---|---|
committer | Ned Batchelder <nedbat@gmail.com> | 2015-08-01 12:53:22 -0400 |
commit | 162bab174bf05323e75e247411b8c86e49420415 (patch) | |
tree | 94f28318f3267dc16a7b8bb1f597d8fb52033bee /coverage/phystokens.py | |
parent | 78a0ad5a6b4668dc9f1807d7bfb431d263b7b071 (diff) | |
parent | 9559181fa49011bc94e51c967010e2cb49714d15 (diff) | |
download | python-coveragepy-162bab174bf05323e75e247411b8c86e49420415.tar.gz |
Merged in traff/coverage.py (pull request #50)
Look for __main__ module if coverage is being run for directory #252
Diffstat (limited to 'coverage/phystokens.py')
-rw-r--r-- | coverage/phystokens.py | 66 |
1 files changed, 51 insertions, 15 deletions
diff --git a/coverage/phystokens.py b/coverage/phystokens.py index ed6bd23..92da8d3 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -1,3 +1,6 @@ +# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 +# For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt + """Better tokenizing for coverage.py.""" import codecs @@ -8,6 +11,7 @@ import tokenize from coverage import env from coverage.backward import iternext +from coverage.misc import contract def phys_tokens(toks): @@ -66,6 +70,7 @@ def phys_tokens(toks): last_lineno = elineno +@contract(source='unicode') def source_token_lines(source): """Generate a series of lines, one for each line in `source`. @@ -134,11 +139,10 @@ class CachedTokenizer(object): self.last_text = None self.last_tokens = None + @contract(text='unicode') def generate_tokens(self, text): """A stand-in for `tokenize.generate_tokens`.""" - # Check the type first so we don't compare bytes to unicode and get - # warnings. - if type(text) != type(self.last_text) or text != self.last_text: + if text != self.last_text: self.last_text = text readline = iternext(text.splitlines(True)) self.last_tokens = list(tokenize.generate_tokens(readline)) @@ -148,14 +152,15 @@ class CachedTokenizer(object): generate_tokens = CachedTokenizer().generate_tokens +COOKIE_RE = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", flags=re.MULTILINE) + +@contract(source='bytes') def _source_encoding_py2(source): """Determine the encoding for `source`, according to PEP 263. - Arguments: - source (byte string): the text of the program. + `source` is a byte string, the text of the program. - Returns: - string: the name of the encoding. + Returns a string, the name of the encoding. """ assert isinstance(source, bytes) @@ -165,8 +170,6 @@ def _source_encoding_py2(source): # This is mostly code adapted from Py3.2's tokenize module. - cookie_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)") - def _get_normal_name(orig_enc): """Imitates get_normal_name in tokenizer.c.""" # Only care about the first 12 characters. @@ -204,7 +207,7 @@ def _source_encoding_py2(source): except UnicodeDecodeError: return None - matches = cookie_re.findall(line_string) + matches = COOKIE_RE.findall(line_string) if not matches: return None encoding = _get_normal_name(matches[0]) @@ -246,17 +249,15 @@ def _source_encoding_py2(source): return default +@contract(source='bytes') def _source_encoding_py3(source): """Determine the encoding for `source`, according to PEP 263. - Arguments: - source (byte string): the text of the program. + `source` is a byte string: the text of the program. - Returns: - string: the name of the encoding. + Returns a string, the name of the encoding. """ - assert isinstance(source, bytes) readline = iternext(source.splitlines(True)) return tokenize.detect_encoding(readline)[0] @@ -265,3 +266,38 @@ if env.PY3: source_encoding = _source_encoding_py3 else: source_encoding = _source_encoding_py2 + + +@contract(source='unicode') +def compile_unicode(source, filename, mode): + """Just like the `compile` builtin, but works on any Unicode string. + + Python 2's compile() builtin has a stupid restriction: if the source string + is Unicode, then it may not have a encoding declaration in it. Why not? + Who knows! + + This function catches that exception, neuters the coding declaration, and + compiles it anyway. + + """ + try: + code = compile(source, filename, mode) + except SyntaxError as synerr: + if "coding declaration in unicode string" not in synerr.args[0].lower(): + raise + source = neuter_encoding_declaration(source) + code = compile(source, filename, mode) + + return code + + +@contract(source='unicode', returns='unicode') +def neuter_encoding_declaration(source): + """Return `source`, with any encoding declaration neutered. + + This function will only ever be called on `source` that has an encoding + declaration, so some edge cases can be ignored. + + """ + source = COOKIE_RE.sub("# (deleted declaration)", source) + return source |