diff options
Diffstat (limited to 'Cython/Utils.py')
-rw-r--r-- | Cython/Utils.py | 317 |
1 files changed, 253 insertions, 64 deletions
diff --git a/Cython/Utils.py b/Cython/Utils.py index 13f83fb75..ffcee9dc3 100644 --- a/Cython/Utils.py +++ b/Cython/Utils.py @@ -1,10 +1,18 @@ -# -# Cython -- Things that don't belong -# anywhere else in particular -# +""" +Cython -- Things that don't belong anywhere else in particular +""" from __future__ import absolute_import +import cython + +cython.declare( + basestring=object, + os=object, sys=object, re=object, io=object, codecs=object, glob=object, shutil=object, tempfile=object, + cython_version=object, + _function_caches=list, _parse_file_version=object, _match_file_encoding=object, +) + try: from __builtin__ import basestring except ImportError: @@ -20,31 +28,97 @@ import sys import re import io import codecs +import glob import shutil import tempfile -from contextlib import contextmanager +from functools import wraps + +from . import __version__ as cython_version + +PACKAGE_FILES = ("__init__.py", "__init__.pyc", "__init__.pyx", "__init__.pxd") + +_build_cache_name = "__{0}_cache".format +_CACHE_NAME_PATTERN = re.compile(r"^__(.+)_cache$") modification_time = os.path.getmtime +GENERATED_BY_MARKER = "/* Generated by Cython %s */" % cython_version +GENERATED_BY_MARKER_BYTES = GENERATED_BY_MARKER.encode('us-ascii') + + +class _TryFinallyGeneratorContextManager(object): + """ + Fast, bare minimum @contextmanager, only for try-finally, not for exception handling. + """ + def __init__(self, gen): + self._gen = gen + + def __enter__(self): + return next(self._gen) + + def __exit__(self, exc_type, exc_val, exc_tb): + try: + next(self._gen) + except (StopIteration, GeneratorExit): + pass + + +def try_finally_contextmanager(gen_func): + @wraps(gen_func) + def make_gen(*args, **kwargs): + return _TryFinallyGeneratorContextManager(gen_func(*args, **kwargs)) + return make_gen + + _function_caches = [] + + def clear_function_caches(): for cache in _function_caches: cache.clear() + def cached_function(f): cache = {} _function_caches.append(cache) uncomputed = object() + + @wraps(f) def wrapper(*args): res = cache.get(args, uncomputed) if res is uncomputed: res = cache[args] = f(*args) return res + wrapper.uncached = f return wrapper + +def _find_cache_attributes(obj): + """The function iterates over the attributes of the object and, + if it finds the name of the cache, it returns it and the corresponding method name. + The method may not be present in the object. + """ + for attr_name in dir(obj): + match = _CACHE_NAME_PATTERN.match(attr_name) + if match is not None: + yield attr_name, match.group(1) + + +def clear_method_caches(obj): + """Removes every cache found in the object, + if a corresponding method exists for that cache. + """ + for cache_name, method_name in _find_cache_attributes(obj): + if hasattr(obj, method_name): + delattr(obj, cache_name) + # if there is no corresponding method, then we assume + # that this attribute was not created by our cached method + + def cached_method(f): - cache_name = '__%s_cache' % f.__name__ + cache_name = _build_cache_name(f.__name__) + def wrapper(self, *args): cache = getattr(self, cache_name, None) if cache is None: @@ -54,8 +128,10 @@ def cached_method(f): return cache[args] res = cache[args] = f(self, *args) return res + return wrapper + def replace_suffix(path, newsuf): base, _ = os.path.splitext(path) return base + newsuf @@ -81,6 +157,9 @@ def castrate_file(path, st): # failed compilation. # Also sets access and modification times back to # those specified by st (a stat struct). + if not is_cython_generated_file(path, allow_failed=True, if_not_found=False): + return + try: f = open_new_file(path) except EnvironmentError: @@ -92,6 +171,42 @@ def castrate_file(path, st): if st: os.utime(path, (st.st_atime, st.st_mtime-1)) + +def is_cython_generated_file(path, allow_failed=False, if_not_found=True): + failure_marker = b"#error Do not use this file, it is the result of a failed Cython compilation." + file_content = None + if os.path.exists(path): + try: + with open(path, "rb") as f: + file_content = f.read(len(failure_marker)) + except (OSError, IOError): + pass # Probably just doesn't exist any more + + if file_content is None: + # file does not exist (yet) + return if_not_found + + return ( + # Cython C file? + file_content.startswith(b"/* Generated by Cython ") or + # Cython output file after previous failures? + (allow_failed and file_content == failure_marker) or + # Let's allow overwriting empty files as well. They might have resulted from previous failures. + not file_content + ) + + +def file_generated_by_this_cython(path): + file_content = b'' + if os.path.exists(path): + try: + with open(path, "rb") as f: + file_content = f.read(len(GENERATED_BY_MARKER_BYTES)) + except (OSError, IOError): + pass # Probably just doesn't exist any more + return file_content and file_content.startswith(GENERATED_BY_MARKER_BYTES) + + def file_newer_than(path, time): ftime = modification_time(path) return ftime > time @@ -134,24 +249,31 @@ def find_root_package_dir(file_path): else: return dir + @cached_function -def check_package_dir(dir, package_names): +def check_package_dir(dir_path, package_names): + namespace = True for dirname in package_names: - dir = os.path.join(dir, dirname) - if not is_package_dir(dir): - return None - return dir + dir_path = os.path.join(dir_path, dirname) + has_init = contains_init(dir_path) + if has_init: + namespace = False + return dir_path, namespace + @cached_function -def is_package_dir(dir_path): - for filename in ("__init__.py", - "__init__.pyc", - "__init__.pyx", - "__init__.pxd"): +def contains_init(dir_path): + for filename in PACKAGE_FILES: path = os.path.join(dir_path, filename) if path_exists(path): return 1 + +def is_package_dir(dir_path): + if contains_init(dir_path): + return 1 + + @cached_function def path_exists(path): # try on the filesystem first @@ -176,6 +298,40 @@ def path_exists(path): pass return False + +_parse_file_version = re.compile(r".*[.]cython-([0-9]+)[.][^./\\]+$").findall + + +@cached_function +def find_versioned_file(directory, filename, suffix, + _current_version=int(re.sub(r"^([0-9]+)[.]([0-9]+).*", r"\1\2", cython_version))): + """ + Search a directory for versioned pxd files, e.g. "lib.cython-30.pxd" for a Cython 3.0+ version. + + @param directory: the directory to search + @param filename: the filename without suffix + @param suffix: the filename extension including the dot, e.g. ".pxd" + @return: the file path if found, or None + """ + assert not suffix or suffix[:1] == '.' + path_prefix = os.path.join(directory, filename) + + matching_files = glob.glob(path_prefix + ".cython-*" + suffix) + path = path_prefix + suffix + if not os.path.exists(path): + path = None + best_match = (-1, path) # last resort, if we do not have versioned .pxd files + + for path in matching_files: + versions = _parse_file_version(path) + if versions: + int_version = int(versions[0]) + # Let's assume no duplicates. + if best_match[0] < int_version <= _current_version: + best_match = (int_version, path) + return best_match[1] + + # file name encodings def decode_filename(filename): @@ -189,12 +345,13 @@ def decode_filename(filename): pass return filename + # support for source file encoding detection _match_file_encoding = re.compile(br"(\w*coding)[:=]\s*([-\w.]+)").search -def detect_opened_file_encoding(f): +def detect_opened_file_encoding(f, default='UTF-8'): # PEPs 263 and 3120 # Most of the time the first two lines fall in the first couple of hundred chars, # and this bulk read/split is much faster. @@ -206,6 +363,7 @@ def detect_opened_file_encoding(f): lines = start.split(b"\n") if not data: break + m = _match_file_encoding(lines[0]) if m and m.group(1) != b'c_string_encoding': return m.group(2).decode('iso8859-1') @@ -213,7 +371,7 @@ def detect_opened_file_encoding(f): m = _match_file_encoding(lines[1]) if m: return m.group(2).decode('iso8859-1') - return "UTF-8" + return default def skip_bom(f): @@ -333,7 +491,7 @@ def get_cython_cache_dir(): return os.path.expanduser(os.path.join('~', '.cython')) -@contextmanager +@try_finally_contextmanager def captured_fd(stream=2, encoding=None): orig_stream = os.dup(stream) # keep copy of original stream try: @@ -345,19 +503,49 @@ def captured_fd(stream=2, encoding=None): return _output[0] os.dup2(temp_file.fileno(), stream) # replace stream by copy of pipe - try: - def get_output(): - result = read_output() - return result.decode(encoding) if encoding else result - - yield get_output - finally: - os.dup2(orig_stream, stream) # restore original stream - read_output() # keep the output in case it's used after closing the context manager + def get_output(): + result = read_output() + return result.decode(encoding) if encoding else result + + yield get_output + # note: @contextlib.contextmanager requires try-finally here + os.dup2(orig_stream, stream) # restore original stream + read_output() # keep the output in case it's used after closing the context manager finally: os.close(orig_stream) +def get_encoding_candidates(): + candidates = [sys.getdefaultencoding()] + for stream in (sys.stdout, sys.stdin, sys.__stdout__, sys.__stdin__): + encoding = getattr(stream, 'encoding', None) + # encoding might be None (e.g. somebody redirects stdout): + if encoding is not None and encoding not in candidates: + candidates.append(encoding) + return candidates + + +def prepare_captured(captured): + captured_bytes = captured.strip() + if not captured_bytes: + return None + for encoding in get_encoding_candidates(): + try: + return captured_bytes.decode(encoding) + except UnicodeDecodeError: + pass + # last resort: print at least the readable ascii parts correctly. + return captured_bytes.decode('latin-1') + + +def print_captured(captured, output, header_line=None): + captured = prepare_captured(captured) + if captured: + if header_line: + output.write(header_line) + output.write(captured) + + def print_bytes(s, header_text=None, end=b'\n', file=sys.stdout, flush=True): if header_text: file.write(header_text) # note: text! => file.write() instead of out.write() @@ -372,33 +560,29 @@ def print_bytes(s, header_text=None, end=b'\n', file=sys.stdout, flush=True): if flush: out.flush() -class LazyStr: - def __init__(self, callback): - self.callback = callback - def __str__(self): - return self.callback() - def __repr__(self): - return self.callback() - def __add__(self, right): - return self.callback() + right - def __radd__(self, left): - return left + self.callback() - class OrderedSet(object): - def __init__(self, elements=()): - self._list = [] - self._set = set() - self.update(elements) - def __iter__(self): - return iter(self._list) - def update(self, elements): - for e in elements: - self.add(e) - def add(self, e): - if e not in self._set: - self._list.append(e) - self._set.add(e) + def __init__(self, elements=()): + self._list = [] + self._set = set() + self.update(elements) + + def __iter__(self): + return iter(self._list) + + def update(self, elements): + for e in elements: + self.add(e) + + def add(self, e): + if e not in self._set: + self._list.append(e) + self._set.add(e) + + def __bool__(self): + return bool(self._set) + + __nonzero__ = __bool__ # Class decorator that adds a metaclass and recreates the class with it. @@ -420,24 +604,30 @@ def add_metaclass(metaclass): def raise_error_if_module_name_forbidden(full_module_name): - #it is bad idea to call the pyx-file cython.pyx, so fail early + # it is bad idea to call the pyx-file cython.pyx, so fail early if full_module_name == 'cython' or full_module_name.startswith('cython.'): raise ValueError('cython is a special module, cannot be used as a module name') def build_hex_version(version_string): """ - Parse and translate '4.3a1' into the readable hex representation '0x040300A1' (like PY_VERSION_HEX). + Parse and translate public version identifier like '4.3a1' into the readable hex representation '0x040300A1' (like PY_VERSION_HEX). + + SEE: https://peps.python.org/pep-0440/#public-version-identifiers """ - # First, parse '4.12a1' into [4, 12, 0, 0xA01]. + # Parse '4.12a1' into [4, 12, 0, 0xA01] + # And ignore .dev, .pre and .post segments digits = [] release_status = 0xF0 - for digit in re.split('([.abrc]+)', version_string): - if digit in ('a', 'b', 'rc'): - release_status = {'a': 0xA0, 'b': 0xB0, 'rc': 0xC0}[digit] + for segment in re.split(r'(\D+)', version_string): + if segment in ('a', 'b', 'rc'): + release_status = {'a': 0xA0, 'b': 0xB0, 'rc': 0xC0}[segment] digits = (digits + [0, 0])[:3] # 1.2a1 -> 1.2.0a1 - elif digit != '.': - digits.append(int(digit)) + elif segment in ('.dev', '.pre', '.post'): + break # break since those are the last segments + elif segment != '.': + digits.append(int(segment)) + digits = (digits + [0] * 3)[:4] digits[3] += release_status @@ -457,15 +647,14 @@ def write_depfile(target, source, dependencies): # paths below the base_dir are relative, otherwise absolute paths = [] for fname in dependencies: - fname = os.path.abspath(fname) if fname.startswith(src_base_dir): try: newpath = os.path.relpath(fname, cwd) except ValueError: # if they are on different Windows drives, absolute is fine - newpath = fname + newpath = os.path.abspath(fname) else: - newpath = fname + newpath = os.path.abspath(fname) paths.append(newpath) depline = os.path.relpath(target, cwd) + ": \\\n " |