diff options
Diffstat (limited to 'numpy/distutils/ccompiler_opt.py')
-rw-r--r-- | numpy/distutils/ccompiler_opt.py | 330 |
1 files changed, 181 insertions, 149 deletions
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py index 3eba6e32a..47d07ad4a 100644 --- a/numpy/distutils/ccompiler_opt.py +++ b/numpy/distutils/ccompiler_opt.py @@ -259,7 +259,7 @@ class _Config: AVX512_SKX = dict( interest=42, implies="AVX512CD", group="AVX512VL AVX512BW AVX512DQ", detect="AVX512_SKX", implies_detect=False, - extra_checks="AVX512BW_MASK" + extra_checks="AVX512BW_MASK AVX512DQ_MASK" ), AVX512_CLX = dict( interest=43, implies="AVX512_SKX", group="AVX512VNNI", @@ -276,7 +276,7 @@ class _Config: ), # IBM/Power ## Power7/ISA 2.06 - VSX = dict(interest=1, headers="altivec.h"), + VSX = dict(interest=1, headers="altivec.h", extra_checks="VSX_ASM"), ## Power8/ISA 2.07 VSX2 = dict(interest=2, implies="VSX", implies_detect=False), ## Power9/ISA 3.00 @@ -543,16 +543,16 @@ class _Distutils: def __init__(self, ccompiler): self._ccompiler = ccompiler - def dist_compile(self, sources, flags, **kwargs): + def dist_compile(self, sources, flags, ccompiler=None, **kwargs): """Wrap CCompiler.compile()""" assert(isinstance(sources, list)) assert(isinstance(flags, list)) flags = kwargs.pop("extra_postargs", []) + flags - return self._ccompiler.compile( - sources, extra_postargs=flags, **kwargs - ) + if not ccompiler: + ccompiler = self._ccompiler + return ccompiler.compile(sources, extra_postargs=flags, **kwargs) - def dist_test(self, source, flags): + def dist_test(self, source, flags, macros=[]): """Return True if 'CCompiler.compile()' able to compile a source file with certain flags. """ @@ -569,7 +569,7 @@ class _Distutils: test = False try: self.dist_compile( - [source], flags, output_dir=self.conf_tmp_path + [source], flags, macros=macros, output_dir=self.conf_tmp_path ) test = True except CompileError as e: @@ -579,45 +579,41 @@ class _Distutils: return test def dist_info(self): - """Return a string containing all environment information, required - by the abstract class '_CCompiler' to discovering the platform - environment, also used as a cache factor in order to detect - any changes from outside. + """ + Return a tuple containing info about (platform, compiler, extra_args), + required by the abstract class '_CCompiler' for discovering the + platform environment. This is also used as a cache factor in order + to detect any changes happening from outside. """ if hasattr(self, "_dist_info"): return self._dist_info - # play it safe - cc_info = "" - compiler = getattr(self._ccompiler, "compiler", None) - if compiler is not None: - if isinstance(compiler, str): - cc_info += compiler - elif hasattr(compiler, "__iter__"): - cc_info += ' '.join(compiler) - # in case if 'compiler' attribute doesn't provide anything - cc_type = getattr(self._ccompiler, "compiler_type", "") - if cc_type in ("intelem", "intelemw", "mingw64"): - cc_info += "x86_64" + + cc_type = getattr(self._ccompiler, "compiler_type", '') + if cc_type in ("intelem", "intelemw"): + platform = "x86_64" elif cc_type in ("intel", "intelw", "intele"): - cc_info += "x86" - elif cc_type in ("msvc", "mingw32"): - import platform - if platform.architecture()[0] == "32bit": - cc_info += "x86" + platform = "x86" + else: + from distutils.util import get_platform + platform = get_platform() + + cc_info = getattr(self._ccompiler, "compiler", getattr(self._ccompiler, "compiler_so", '')) + if not cc_type or cc_type == "unix": + if hasattr(cc_info, "__iter__"): + compiler = cc_info[0] else: - cc_info += "x86_64" + compiler = str(cc_info) else: - # the last hope, too bad for cross-compiling - import platform - cc_info += platform.machine() + compiler = cc_type - cc_info += cc_type - cflags = os.environ.get("CFLAGS", "") - if cflags not in cc_info: - cc_info += cflags + if hasattr(cc_info, "__iter__") and len(cc_info) > 1: + extra_args = ' '.join(cc_info[1:]) + else: + extra_args = os.environ.get("CFLAGS", "") + extra_args += os.environ.get("CPPFLAGS", "") - self._dist_info = cc_info - return cc_info + self._dist_info = (platform, compiler, extra_args) + return self._dist_info @staticmethod def dist_error(*args): @@ -751,12 +747,14 @@ class _Cache: self.cache_me = {} self.cache_private = set() self.cache_infile = False + self._cache_path = None if self.conf_nocache: self.dist_log("cache is disabled by `Config`") return - chash = self.cache_hash(*factors, *self.conf_cache_factors) + self._cache_hash = self.cache_hash(*factors, *self.conf_cache_factors) + self._cache_path = cache_path if cache_path: if os.path.exists(cache_path): self.dist_log("load cache from file ->", cache_path) @@ -769,7 +767,7 @@ class _Cache: elif not hasattr(cache_mod, "hash") or \ not hasattr(cache_mod, "data"): self.dist_log("invalid cache file", stderr=True) - elif chash == cache_mod.hash: + elif self._cache_hash == cache_mod.hash: self.dist_log("hit the file cache") for attr, val in cache_mod.data.items(): setattr(self, attr, val) @@ -777,10 +775,8 @@ class _Cache: else: self.dist_log("miss the file cache") - atexit.register(self._cache_write, cache_path, chash) - if not self.cache_infile: - other_cache = _share_cache.get(chash) + other_cache = _share_cache.get(self._cache_hash) if other_cache: self.dist_log("hit the memory cache") for attr, val in other_cache.__dict__.items(): @@ -789,32 +785,41 @@ class _Cache: continue setattr(self, attr, val) - _share_cache[chash] = self + _share_cache[self._cache_hash] = self + atexit.register(self.cache_flush) def __del__(self): - # TODO: remove the cache form share on del - pass + for h, o in _share_cache.items(): + if o == self: + _share_cache.pop(h) + break - def _cache_write(self, cache_path, cache_hash): + def cache_flush(self): + """ + Force update the cache. + """ + if not self._cache_path: + return # TODO: don't write if the cache doesn't change - self.dist_log("write cache to path ->", cache_path) - for attr in list(self.__dict__.keys()): + self.dist_log("write cache to path ->", self._cache_path) + cdict = self.__dict__.copy() + for attr in self.__dict__.keys(): if re.match(self._cache_ignore, attr): - self.__dict__.pop(attr) + cdict.pop(attr) - d = os.path.dirname(cache_path) + d = os.path.dirname(self._cache_path) if not os.path.exists(d): os.makedirs(d) - repr_dict = pprint.pformat(self.__dict__, compact=True) - with open(cache_path, "w") as f: + repr_dict = pprint.pformat(cdict, compact=True) + with open(self._cache_path, "w") as f: f.write(textwrap.dedent("""\ # AUTOGENERATED DON'T EDIT # Please make changes to the code generator \ (distutils/ccompiler_opt.py) hash = {} data = \\ - """).format(cache_hash)) + """).format(self._cache_hash)) f.write(repr_dict) def cache_hash(self, *factors): @@ -845,7 +850,7 @@ class _Cache: return ccb return cache_wrap_me -class _CCompiler(object): +class _CCompiler: """A helper class for `CCompilerOpt` containing all utilities that related to the fundamental compiler's functions. @@ -893,57 +898,56 @@ class _CCompiler(object): def __init__(self): if hasattr(self, "cc_is_cached"): return - to_detect = ( - # attr regex - ( - ("cc_on_x64", "^(x|x86_|amd)64"), - ("cc_on_x86", "^(x86|i386|i686)"), - ("cc_on_ppc64le", "^(powerpc|ppc)64(el|le)"), - ("cc_on_ppc64", "^(powerpc|ppc)64"), - ("cc_on_armhf", "^arm"), - ("cc_on_aarch64", "^aarch64"), - # priority is given to first of string - # if it fail we search in the rest, due - # to append platform.machine() at the end, - # check method 'dist_info()' for more clarification. - ("cc_on_x64", ".*(x|x86_|amd)64.*"), - ("cc_on_x86", ".*(x86|i386|i686).*"), - ("cc_on_ppc64le", ".*(powerpc|ppc)64(el|le).*"), - ("cc_on_ppc64", ".*(powerpc|ppc)64.*"), - ("cc_on_armhf", ".*arm.*"), - ("cc_on_aarch64", ".*aarch64.*"), - # undefined platform - ("cc_on_noarch", ""), - ), - ( - ("cc_is_gcc", r".*(gcc|gnu\-g).*"), - ("cc_is_clang", ".*clang.*"), - ("cc_is_iccw", ".*(intelw|intelemw|iccw).*"), # intel msvc like - ("cc_is_icc", ".*(intel|icc).*"), # intel unix like - ("cc_is_msvc", ".*msvc.*"), - ("cc_is_nocc", ""), - ), - (("cc_has_debug", ".*(O0|Od|ggdb|coverage|debug:full).*"),), - (("cc_has_native", ".*(-march=native|-xHost|/QxHost).*"),), - # in case if the class run with -DNPY_DISABLE_OPTIMIZATION - (("cc_noopt", ".*DISABLE_OPT.*"),), + # attr regex + detect_arch = ( + ("cc_on_x64", ".*(x|x86_|amd)64.*"), + ("cc_on_x86", ".*(win32|x86|i386|i686).*"), + ("cc_on_ppc64le", ".*(powerpc|ppc)64(el|le).*"), + ("cc_on_ppc64", ".*(powerpc|ppc)64.*"), + ("cc_on_aarch64", ".*(aarch64|arm64).*"), + ("cc_on_armhf", ".*arm.*"), + # undefined platform + ("cc_on_noarch", ""), + ) + detect_compiler = ( + ("cc_is_gcc", r".*(gcc|gnu\-g).*"), + ("cc_is_clang", ".*clang.*"), + ("cc_is_iccw", ".*(intelw|intelemw|iccw).*"), # intel msvc like + ("cc_is_icc", ".*(intel|icc).*"), # intel unix like + ("cc_is_msvc", ".*msvc.*"), + # undefined compiler will be treat it as gcc + ("cc_is_nocc", ""), + ) + detect_args = ( + ("cc_has_debug", ".*(O0|Od|ggdb|coverage|debug:full).*"), + ("cc_has_native", ".*(-march=native|-xHost|/QxHost).*"), + # in case if the class run with -DNPY_DISABLE_OPTIMIZATION + ("cc_noopt", ".*DISABLE_OPT.*"), ) - for section in to_detect: - for attr, rgex in section: - setattr(self, attr, False) dist_info = self.dist_info() - for section in to_detect: + platform, compiler_info, extra_args = dist_info + # set False to all attrs + for section in (detect_arch, detect_compiler, detect_args): for attr, rgex in section: - if rgex and not re.match(rgex, dist_info, re.IGNORECASE): + setattr(self, attr, False) + + for detect, searchin in ((detect_arch, platform), (detect_compiler, compiler_info)): + for attr, rgex in detect: + if rgex and not re.match(rgex, searchin, re.IGNORECASE): continue setattr(self, attr, True) break + for attr, rgex in detect_args: + if rgex and not re.match(rgex, extra_args, re.IGNORECASE): + continue + setattr(self, attr, True) + if self.cc_on_noarch: self.dist_log( - "unable to detect CPU arch via compiler info, " - "optimization is disabled \ninfo << %s >> " % dist_info, + "unable to detect CPU architecture which lead to disable the optimization. " + f"check dist_info:<<\n{dist_info}\n>>", stderr=True ) self.cc_noopt = True @@ -958,8 +962,9 @@ class _CCompiler(object): but still has the same gcc optimization flags. """ self.dist_log( - "unable to detect compiler name via info <<\n%s\n>> " - "treating it as a gcc" % dist_info, + "unable to detect compiler type which leads to treating it as GCC. " + "this is a normal behavior if you're using gcc-like compiler such as MinGW or IBM/XLC." + f"check dist_info:<<\n{dist_info}\n>>", stderr=True ) self.cc_is_gcc = True @@ -1167,20 +1172,23 @@ class _Feature: self.feature_is_cached = True - def feature_names(self, names=None, force_flags=None): + def feature_names(self, names=None, force_flags=None, macros=[]): """ Returns a set of CPU feature names that supported by platform and the **C** compiler. Parameters ---------- - 'names': sequence or None, optional + names: sequence or None, optional Specify certain CPU features to test it against the **C** compiler. if None(default), it will test all current supported features. **Note**: feature names must be in upper-case. - 'force_flags': list or None, optional - If None(default), default compiler flags for every CPU feature will be used - during the test. + force_flags: list or None, optional + If None(default), default compiler flags for every CPU feature will + be used during the test. + + macros : list of tuples, optional + A list of C macro definitions. """ assert( names is None or ( @@ -1193,7 +1201,9 @@ class _Feature: names = self.feature_supported.keys() supported_names = set() for f in names: - if self.feature_is_supported(f, force_flags=force_flags): + if self.feature_is_supported( + f, force_flags=force_flags, macros=macros + ): supported_names.add(f) return supported_names @@ -1428,20 +1438,23 @@ class _Feature: return self.cc_normalize_flags(flags) @_Cache.me - def feature_test(self, name, force_flags=None): + def feature_test(self, name, force_flags=None, macros=[]): """ Test a certain CPU feature against the compiler through its own check file. Parameters ---------- - 'name': str + name: str Supported CPU feature name. - 'force_flags': list or None, optional + force_flags: list or None, optional If None(default), the returned flags from `feature_flags()` will be used. - """ + + macros : list of tuples, optional + A list of C macro definitions. + """ if force_flags is None: force_flags = self.feature_flags(name) @@ -1457,24 +1470,29 @@ class _Feature: if not os.path.exists(test_path): self.dist_fatal("feature test file is not exist", test_path) - test = self.dist_test(test_path, force_flags + self.cc_flags["werror"]) + test = self.dist_test( + test_path, force_flags + self.cc_flags["werror"], macros=macros + ) if not test: self.dist_log("testing failed", stderr=True) return test @_Cache.me - def feature_is_supported(self, name, force_flags=None): + def feature_is_supported(self, name, force_flags=None, macros=[]): """ Check if a certain CPU feature is supported by the platform and compiler. Parameters ---------- - 'name': str + name: str CPU feature name in uppercase. - 'force_flags': list or None, optional - If None(default), default compiler flags for every CPU feature will be used - during test. + force_flags: list or None, optional + If None(default), default compiler flags for every CPU feature will + be used during test. + + macros : list of tuples, optional + A list of C macro definitions. """ assert(name.isupper()) assert(force_flags is None or isinstance(force_flags, list)) @@ -1482,9 +1500,9 @@ class _Feature: supported = name in self.feature_supported if supported: for impl in self.feature_implies(name): - if not self.feature_test(impl, force_flags): + if not self.feature_test(impl, force_flags, macros=macros): return False - if not self.feature_test(name, force_flags): + if not self.feature_test(name, force_flags, macros=macros): return False return supported @@ -1774,7 +1792,7 @@ class _Parse: tokens = tokens[start_pos:end_pos] return self._parse_target_tokens(tokens) - _parse_regex_arg = re.compile(r'\s|[,]|([+-])') + _parse_regex_arg = re.compile(r'\s|,|([+-])') def _parse_arg_features(self, arg_name, req_features): if not isinstance(req_features, str): self.dist_fatal("expected a string in '%s'" % arg_name) @@ -1807,7 +1825,9 @@ class _Parse: self.dist_fatal(arg_name, "native option isn't supported by the compiler" ) - features_to = self.feature_names(force_flags=native) + features_to = self.feature_names( + force_flags=native, macros=[("DETECT_FEATURES", 1)] + ) elif TOK == "MAX": features_to = self.feature_supported.keys() elif TOK == "MIN": @@ -2147,7 +2167,7 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse): """ return self.parse_dispatch_names - def try_dispatch(self, sources, src_dir=None, **kwargs): + def try_dispatch(self, sources, src_dir=None, ccompiler=None, **kwargs): """ Compile one or more dispatch-able sources and generates object files, also generates abstract C config headers and macros that @@ -2170,6 +2190,11 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse): Path of parent directory for the generated headers and wrapped sources. If None(default) the files will generated in-place. + ccompiler: CCompiler + Distutils `CCompiler` instance to be used for compilation. + If None (default), the provided instance during the initialization + will be used instead. + **kwargs : any Arguments to pass on to the `CCompiler.compile()` @@ -2224,13 +2249,15 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse): # among them. objects = [] for flags, srcs in to_compile.items(): - objects += self.dist_compile(srcs, list(flags), **kwargs) + objects += self.dist_compile( + srcs, list(flags), ccompiler=ccompiler, **kwargs + ) return objects def generate_dispatch_header(self, header_path): """ - Generate the dispatch header which containing all definitions - and headers of instruction-sets for the enabled CPU baseline and + Generate the dispatch header which contains the #definitions and headers + for platform-specific instruction-sets for the enabled CPU baseline and dispatch-able features. Its highly recommended to take a look at the generated header @@ -2244,6 +2271,14 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse): baseline_len = len(baseline_names) dispatch_len = len(dispatch_names) + header_dir = os.path.dirname(header_path) + if not os.path.exists(header_dir): + self.dist_log( + f"dispatch header dir {header_dir} does not exist, creating it", + stderr=True + ) + os.makedirs(header_dir) + with open(header_path, 'w') as f: baseline_calls = ' \\\n'.join([ ( @@ -2304,19 +2339,25 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse): def report(self, full=False): report = [] + platform_rows = [] baseline_rows = [] dispatch_rows = [] + report.append(("Platform", platform_rows)) + report.append(("", "")) report.append(("CPU baseline", baseline_rows)) report.append(("", "")) report.append(("CPU dispatch", dispatch_rows)) + ########## platform ########## + platform_rows.append(("Architecture", ( + "unsupported" if self.cc_on_noarch else self.cc_march) + )) + platform_rows.append(("Compiler", ( + "unix-like" if self.cc_is_nocc else self.cc_name) + )) ########## baseline ########## if self.cc_noopt: - baseline_rows.append(( - "Requested", "optimization disabled %s" % ( - "(unsupported arch)" if self.cc_on_noarch else "" - ) - )) + baseline_rows.append(("Requested", "optimization disabled")) else: baseline_rows.append(("Requested", repr(self._requested_baseline))) @@ -2337,11 +2378,7 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse): ########## dispatch ########## if self.cc_noopt: - dispatch_rows.append(( - "Requested", "optimization disabled %s" % ( - "(unsupported arch)" if self.cc_on_noarch else "" - ) - )) + baseline_rows.append(("Requested", "optimization disabled")) else: dispatch_rows.append(("Requested", repr(self._requested_dispatch))) @@ -2448,7 +2485,8 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse): return wrap_path def _generate_config(self, output_dir, dispatch_src, targets, has_baseline=False): - config_path = os.path.basename(dispatch_src).replace(".c", ".h") + config_path = os.path.basename(dispatch_src) + config_path = os.path.splitext(config_path)[0] + '.h' config_path = os.path.join(output_dir, config_path) # check if targets didn't change to avoid recompiling cache_hash = self.cache_hash(targets, has_baseline) @@ -2506,30 +2544,24 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse): )) return False -def new_ccompiler_opt(compiler, **kwargs): +def new_ccompiler_opt(compiler, dispatch_hpath, **kwargs): """ Create a new instance of 'CCompilerOpt' and generate the dispatch header - inside NumPy source dir. + which contains the #definitions and headers of platform-specific instruction-sets for + the enabled CPU baseline and dispatch-able features. Parameters ---------- - 'compiler' : CCompiler instance - '**kwargs': passed as-is to `CCompilerOpt(...)` + compiler : CCompiler instance + dispatch_hpath : str + path of the dispatch header + **kwargs: passed as-is to `CCompilerOpt(...)` Returns ------- new instance of CCompilerOpt """ opt = CCompilerOpt(compiler, **kwargs) - npy_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) - header_dir = os.path.join(npy_path, *("core/src/common".split("/"))) - header_path = os.path.join(header_dir, "_cpu_dispatch.h") - if not os.path.exists(header_path) or not opt.is_cached(): - if not os.path.exists(header_dir): - opt.dist_log( - "dispatch header dir '%s' isn't exist, creating it" % header_dir, - stderr=True - ) - os.makedirs(header_dir) - opt.generate_dispatch_header(header_path) + if not os.path.exists(dispatch_hpath) or not opt.is_cached(): + opt.generate_dispatch_header(dispatch_hpath) return opt |