summaryrefslogtreecommitdiff
path: root/coverage/phystokens.py
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2021-05-01 13:02:31 -0400
committerNed Batchelder <ned@nedbatchelder.com>2021-05-01 13:30:39 -0400
commit9df434550a499c16e9fd26cfb9627837bfdc02a5 (patch)
tree5619ea3c3bec05d04363a66ced9c7ebffcefb1df /coverage/phystokens.py
parent3fe17c1f2244c07cf9d0f9e3609392c2ad441db1 (diff)
downloadpython-coveragepy-git-9df434550a499c16e9fd26cfb9627837bfdc02a5.tar.gz
refactor: remove code explicitly choosing between py2 and py3
Diffstat (limited to 'coverage/phystokens.py')
-rw-r--r--coverage/phystokens.py110
1 files changed, 2 insertions, 108 deletions
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index 54378b3b..7556d310 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -3,15 +3,12 @@
"""Better tokenizing for coverage.py."""
-import codecs
import keyword
import re
-import sys
import token
import tokenize
-from coverage import env
-from coverage.backward import iternext, unicode_class
+from coverage.backward import iternext
from coverage.misc import contract
@@ -154,102 +151,7 @@ generate_tokens = CachedTokenizer().generate_tokens
COOKIE_RE = re.compile(r"^[ \t]*#.*coding[:=][ \t]*([-\w.]+)", flags=re.MULTILINE)
@contract(source='bytes')
-def _source_encoding_py2(source):
- """Determine the encoding for `source`, according to PEP 263.
-
- `source` is a byte string, the text of the program.
-
- Returns a string, the name of the encoding.
-
- """
- assert isinstance(source, bytes)
-
- # Do this so the detect_encode code we copied will work.
- readline = iternext(source.splitlines(True))
-
- # This is mostly code adapted from Py3.2's tokenize module.
-
- def _get_normal_name(orig_enc):
- """Imitates get_normal_name in tokenizer.c."""
- # Only care about the first 12 characters.
- enc = orig_enc[:12].lower().replace("_", "-")
- if re.match(r"^utf-8($|-)", enc):
- return "utf-8"
- if re.match(r"^(latin-1|iso-8859-1|iso-latin-1)($|-)", enc):
- return "iso-8859-1"
- return orig_enc
-
- # From detect_encode():
- # It detects the encoding from the presence of a UTF-8 BOM or an encoding
- # cookie as specified in PEP-0263. If both a BOM and a cookie are present,
- # but disagree, a SyntaxError will be raised. If the encoding cookie is an
- # invalid charset, raise a SyntaxError. Note that if a UTF-8 BOM is found,
- # 'utf-8-sig' is returned.
-
- # If no encoding is specified, then the default will be returned.
- default = 'ascii'
-
- bom_found = False
- encoding = None
-
- def read_or_stop():
- """Get the next source line, or ''."""
- try:
- return readline()
- except StopIteration:
- return ''
-
- def find_cookie(line):
- """Find an encoding cookie in `line`."""
- try:
- line_string = line.decode('ascii')
- except UnicodeDecodeError:
- return None
-
- matches = COOKIE_RE.findall(line_string)
- if not matches:
- return None
- encoding = _get_normal_name(matches[0])
- try:
- codec = codecs.lookup(encoding)
- except LookupError:
- # This behavior mimics the Python interpreter
- raise SyntaxError("unknown encoding: " + encoding)
-
- if bom_found:
- # codecs in 2.3 were raw tuples of functions, assume the best.
- codec_name = getattr(codec, 'name', encoding)
- if codec_name != 'utf-8':
- # This behavior mimics the Python interpreter
- raise SyntaxError('encoding problem: utf-8')
- encoding += '-sig'
- return encoding
-
- first = read_or_stop()
- if first.startswith(codecs.BOM_UTF8):
- bom_found = True
- first = first[3:]
- default = 'utf-8-sig'
- if not first:
- return default
-
- encoding = find_cookie(first)
- if encoding:
- return encoding
-
- second = read_or_stop()
- if not second:
- return default
-
- encoding = find_cookie(second)
- if encoding:
- return encoding
-
- return default
-
-
-@contract(source='bytes')
-def _source_encoding_py3(source):
+def source_encoding(source):
"""Determine the encoding for `source`, according to PEP 263.
`source` is a byte string: the text of the program.
@@ -261,12 +163,6 @@ def _source_encoding_py3(source):
return tokenize.detect_encoding(readline)[0]
-if env.PY3:
- source_encoding = _source_encoding_py3
-else:
- source_encoding = _source_encoding_py2
-
-
@contract(source='unicode')
def compile_unicode(source, filename, mode):
"""Just like the `compile` builtin, but works on any Unicode string.
@@ -280,8 +176,6 @@ def compile_unicode(source, filename, mode):
"""
source = neuter_encoding_declaration(source)
- if env.PY2 and isinstance(filename, unicode_class):
- filename = filename.encode(sys.getfilesystemencoding(), "replace")
code = compile(source, filename, mode)
return code