diff options
author | Developer-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com> | 2022-10-19 15:55:59 -0700 |
---|---|---|
committer | Developer-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com> | 2022-10-19 15:55:59 -0700 |
commit | 4730b6f33f5c699ae1a49471e7e60f81ed5494b5 (patch) | |
tree | 0d18a8bb24f01151d6adb639c7ffc91e1dc63eff | |
parent | 28c8e9224f887a56936cf5f18423b426704334c2 (diff) | |
download | numpy-4730b6f33f5c699ae1a49471e7e60f81ed5494b5.tar.gz |
review feedback, make lint happy
-rw-r--r-- | numpy/core/setup.py | 37 | ||||
-rw-r--r-- | numpy/core/setup.py.orig | 21 | ||||
-rw-r--r-- | numpy/core/setup_common.py | 51 | ||||
-rw-r--r-- | numpy/core/setup_common.py.orig | 462 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 1 |
5 files changed, 60 insertions, 512 deletions
diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 483e62a34..768d01084 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -173,6 +173,16 @@ def check_math_capabilities(config, ext, moredefs, mathlibs): else: return 1 + # GH-14787: Work around GCC<8.4 bug when compiling with AVX512 + # support on Windows-based platforms + def check_gh14787(fn): + if fn == 'attribute_target_avx512f': + if (sys.platform in ('win32', 'cygwin') and + config.check_compiler_gcc() and + not config.check_gcc_version_at_least(8, 4)): + ext.extra_compile_args.extend( + ['-ffixed-xmm%s' % n for n in range(16, 32)]) + #use_msvc = config.check_decl("_MSC_VER") if not check_funcs_once(MANDATORY_FUNCS, add_to_moredefs=False): raise SystemError("One of the required function to build numpy is not" @@ -223,31 +233,18 @@ def check_math_capabilities(config, ext, moredefs, mathlibs): for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES: if config.check_gcc_function_attribute(dec, fn): moredefs.append((fname2def(fn), 1)) - if fn == 'attribute_target_avx512f': - # GH-14787: Work around GCC<8.4 bug when compiling with AVX512 - # support on Windows-based platforms - if (sys.platform in ('win32', 'cygwin') and - config.check_compiler_gcc() and - not config.check_gcc_version_at_least(8, 4)): - ext.extra_compile_args.extend( - ['-ffixed-xmm%s' % n for n in range(16, 32)]) + check_gh14787(fn) platform = sysconfig.get_platform() - if("x86_64" in platform): + if ("x86_64" in platform): for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES_AVX: if config.check_gcc_function_attribute(dec, fn): moredefs.append((fname2def(fn), 1)) - if fn == 'attribute_target_avx512f': - # GH-14787: Work around GCC<8.4 bug when compiling with AVX512 - # support on Windows-based platforms - if (sys.platform in ('win32', 'cygwin') and - config.check_compiler_gcc() and - not config.check_gcc_version_at_least(8, 4)): - ext.extra_compile_args.extend( - ['-ffixed-xmm%s' % n for n in range(16, 32)]) - for dec, fn, code, header in OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX: - if config.check_gcc_function_attribute_with_intrinsics(dec, fn, code, - header): + check_gh14787(fn) + for dec, fn, code, header in ( + OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX): + if config.check_gcc_function_attribute_with_intrinsics( + dec, fn, code, header): moredefs.append((fname2def(fn), 1)) for fn in OPTIONAL_VARIABLE_ATTRIBUTES: diff --git a/numpy/core/setup.py.orig b/numpy/core/setup.py.orig index 65aacfdad..483e62a34 100644 --- a/numpy/core/setup.py.orig +++ b/numpy/core/setup.py.orig @@ -232,10 +232,23 @@ def check_math_capabilities(config, ext, moredefs, mathlibs): ext.extra_compile_args.extend( ['-ffixed-xmm%s' % n for n in range(16, 32)]) - for dec, fn, code, header in OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS: - if config.check_gcc_function_attribute_with_intrinsics(dec, fn, code, - header): - moredefs.append((fname2def(fn), 1)) + platform = sysconfig.get_platform() + if("x86_64" in platform): + for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES_AVX: + if config.check_gcc_function_attribute(dec, fn): + moredefs.append((fname2def(fn), 1)) + if fn == 'attribute_target_avx512f': + # GH-14787: Work around GCC<8.4 bug when compiling with AVX512 + # support on Windows-based platforms + if (sys.platform in ('win32', 'cygwin') and + config.check_compiler_gcc() and + not config.check_gcc_version_at_least(8, 4)): + ext.extra_compile_args.extend( + ['-ffixed-xmm%s' % n for n in range(16, 32)]) + for dec, fn, code, header in OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX: + if config.check_gcc_function_attribute_with_intrinsics(dec, fn, code, + header): + moredefs.append((fname2def(fn), 1)) for fn in OPTIONAL_VARIABLE_ATTRIBUTES: if config.check_gcc_variable_attribute(fn): diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 23eda03db..a73f33b3e 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -207,14 +207,14 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', ] OPTIONAL_FUNCTION_ATTRIBUTES_AVX = [('__attribute__((target ("avx")))', - 'attribute_target_avx'), - ('__attribute__((target ("avx2")))', - 'attribute_target_avx2'), - ('__attribute__((target ("avx512f")))', - 'attribute_target_avx512f'), - ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', - 'attribute_target_avx512_skx'), - ] + 'attribute_target_avx'), + ('__attribute__((target ("avx2")))', + 'attribute_target_avx2'), + ('__attribute__((target ("avx512f")))', + 'attribute_target_avx512f'), + ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', + 'attribute_target_avx512_skx'), + ] # function attributes with intrinsics # To ensure your compiler can compile avx intrinsics with just the attributes @@ -224,23 +224,24 @@ OPTIONAL_FUNCTION_ATTRIBUTES_AVX = [('__attribute__((target ("avx")))', # The _mm512_castps_si512 instruction is specific check for AVX-512F support # in gcc-4.9 which is missing a subset of intrinsics. See # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878 -OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX = [('__attribute__((target("avx2,fma")))', - 'attribute_target_avx2_with_intrinsics', - '__m256 temp = _mm256_set1_ps(1.0); temp = \ - _mm256_fmadd_ps(temp, temp, temp)', - 'immintrin.h'), - ('__attribute__((target("avx512f")))', - 'attribute_target_avx512f_with_intrinsics', - '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))', - 'immintrin.h'), - ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', - 'attribute_target_avx512_skx_with_intrinsics', - '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\ - __m512i unused_temp = \ - _mm512_castps_si512(_mm512_set1_ps(1.0));\ - _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))', - 'immintrin.h'), - ] +OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX = [ + ('__attribute__((target("avx2,fma")))', + 'attribute_target_avx2_with_intrinsics', + '__m256 temp = _mm256_set1_ps(1.0); temp = \ + _mm256_fmadd_ps(temp, temp, temp)', + 'immintrin.h'), + ('__attribute__((target("avx512f")))', + 'attribute_target_avx512f_with_intrinsics', + '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))', + 'immintrin.h'), + ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', + 'attribute_target_avx512_skx_with_intrinsics', + '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\ + __m512i unused_temp = \ + _mm512_castps_si512(_mm512_set1_ps(1.0));\ + _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))', + 'immintrin.h'), + ] def fname2def(name): return "HAVE_%s" % name.upper() diff --git a/numpy/core/setup_common.py.orig b/numpy/core/setup_common.py.orig deleted file mode 100644 index c19a8479f..000000000 --- a/numpy/core/setup_common.py.orig +++ /dev/null @@ -1,462 +0,0 @@ -# Code common to build tools -import copy -import pathlib -import sys -import textwrap - -from numpy.distutils.misc_util import mingw32 - - -#------------------- -# Versioning support -#------------------- -# How to change C_API_VERSION ? -# - increase C_API_VERSION value -# - record the hash for the new C API with the cversions.py script -# and add the hash to cversions.txt -# The hash values are used to remind developers when the C API number was not -# updated - generates a MismatchCAPIWarning warning which is turned into an -# exception for released version. - -# Binary compatibility version number. This number is increased whenever the -# C-API is changed such that binary compatibility is broken, i.e. whenever a -# recompile of extension modules is needed. -C_ABI_VERSION = 0x01000009 - -# Minor API version. This number is increased whenever a change is made to the -# C-API -- whether it breaks binary compatibility or not. Some changes, such -# as adding a function pointer to the end of the function table, can be made -# without breaking binary compatibility. In this case, only the C_API_VERSION -# (*not* C_ABI_VERSION) would be increased. Whenever binary compatibility is -# broken, both C_API_VERSION and C_ABI_VERSION should be increased. -# -# The version needs to be kept in sync with that in cversions.txt. -# -# 0x00000008 - 1.7.x -# 0x00000009 - 1.8.x -# 0x00000009 - 1.9.x -# 0x0000000a - 1.10.x -# 0x0000000a - 1.11.x -# 0x0000000a - 1.12.x -# 0x0000000b - 1.13.x -# 0x0000000c - 1.14.x -# 0x0000000c - 1.15.x -# 0x0000000d - 1.16.x -# 0x0000000d - 1.19.x -# 0x0000000e - 1.20.x -# 0x0000000e - 1.21.x -# 0x0000000f - 1.22.x -# 0x00000010 - 1.23.x -# 0x00000010 - 1.24.x -C_API_VERSION = 0x00000010 - -class MismatchCAPIError(ValueError): - pass - - -def get_api_versions(apiversion, codegen_dir): - """ - Return current C API checksum and the recorded checksum. - - Return current C API checksum and the recorded checksum for the given - version of the C API version. - - """ - # Compute the hash of the current API as defined in the .txt files in - # code_generators - sys.path.insert(0, codegen_dir) - try: - m = __import__('genapi') - numpy_api = __import__('numpy_api') - curapi_hash = m.fullapi_hash(numpy_api.full_api) - apis_hash = m.get_versions_hash() - finally: - del sys.path[0] - - return curapi_hash, apis_hash[apiversion] - -def check_api_version(apiversion, codegen_dir): - """Emits a MismatchCAPIWarning if the C API version needs updating.""" - curapi_hash, api_hash = get_api_versions(apiversion, codegen_dir) - - # If different hash, it means that the api .txt files in - # codegen_dir have been updated without the API version being - # updated. Any modification in those .txt files should be reflected - # in the api and eventually abi versions. - # To compute the checksum of the current API, use numpy/core/cversions.py - if not curapi_hash == api_hash: - msg = ("API mismatch detected, the C API version " - "numbers have to be updated. Current C api version is " - f"{apiversion}, with checksum {curapi_hash}, but recorded " - f"checksum in core/codegen_dir/cversions.txt is {api_hash}. If " - "functions were added in the C API, you have to update " - f"C_API_VERSION in {__file__}." - ) - raise MismatchCAPIError(msg) - - -FUNC_CALL_ARGS = {} - -def set_sig(sig): - prefix, _, args = sig.partition("(") - args = args.rpartition(")")[0] - funcname = prefix.rpartition(" ")[-1] - args = [arg.strip() for arg in args.split(",")] - FUNC_CALL_ARGS[funcname] = ", ".join("(%s) 0" % arg for arg in args) - - -for file in [ - "feature_detection_locale.h", - "feature_detection_math.h", - "feature_detection_misc.h", - "feature_detection_stdio.h", -]: - with open(pathlib.Path(__file__).parent / file) as f: - for line in f: - if line.startswith("#"): - continue - if not line.strip(): - continue - set_sig(line) - -# Mandatory functions: if not found, fail the build -MANDATORY_FUNCS = [ - "sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs", - "floor", "ceil", "sqrt", "log10", "log", "exp", "asin", - "acos", "atan", "fmod", 'modf', 'frexp', 'ldexp', - "expm1", "log1p", "acosh", "asinh", "atanh", - "rint", "trunc", "exp2", - "copysign", "nextafter", "strtoll", "strtoull", "cbrt", - "log2", "pow", "hypot", "atan2", -] - -OPTIONAL_LOCALE_FUNCS = ["strtold_l"] -OPTIONAL_FILE_FUNCS = ["ftello", "fseeko", "fallocate"] -OPTIONAL_MISC_FUNCS = ["backtrace", "madvise"] - -# variable attributes tested via "int %s a" % attribute -OPTIONAL_VARIABLE_ATTRIBUTES = ["__thread", "__declspec(thread)"] - -# Subset of OPTIONAL_*_FUNCS which may already have HAVE_* defined by Python.h -OPTIONAL_FUNCS_MAYBE = [ - "ftello", "fseeko" - ] - -C99_COMPLEX_TYPES = [ - 'complex double', 'complex float', 'complex long double' - ] -C99_COMPLEX_FUNCS = [ - "cabs", "cacos", "cacosh", "carg", "casin", "casinh", "catan", - "catanh", "ccos", "ccosh", "cexp", "cimag", "clog", "conj", "cpow", - "cproj", "creal", "csin", "csinh", "csqrt", "ctan", "ctanh" - ] - -OPTIONAL_HEADERS = [ -# sse headers only enabled automatically on amd64/x32 builds - "xmmintrin.h", # SSE - "emmintrin.h", # SSE2 - "immintrin.h", # AVX - "features.h", # for glibc version linux - "xlocale.h", # see GH#8367 - "dlfcn.h", # dladdr - "sys/mman.h", #madvise -] - -# optional gcc compiler builtins and their call arguments and optional a -# required header and definition name (HAVE_ prepended) -# call arguments are required as the compiler will do strict signature checking -OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'), - ("__builtin_isinf", '5.'), - ("__builtin_isfinite", '5.'), - ("__builtin_bswap32", '5u'), - ("__builtin_bswap64", '5u'), - ("__builtin_expect", '5, 0'), - ("__builtin_mul_overflow", '5, 5, (int*)5'), - # MMX only needed for icc, but some clangs don't have it - ("_m_from_int64", '0', "emmintrin.h"), - ("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE - ("_mm_prefetch", '(float*)0, _MM_HINT_NTA', - "xmmintrin.h"), # SSE - ("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2 - ("__builtin_prefetch", "(float*)0, 0, 3"), - # check that the linker can handle avx - ("__asm__ volatile", '"vpand %xmm1, %xmm2, %xmm3"', - "stdio.h", "LINK_AVX"), - ("__asm__ volatile", '"vpand %ymm1, %ymm2, %ymm3"', - "stdio.h", "LINK_AVX2"), - ("__asm__ volatile", '"vpaddd %zmm1, %zmm2, %zmm3"', - "stdio.h", "LINK_AVX512F"), - ("__asm__ volatile", '"vfpclasspd $0x40, %zmm15, %k6\\n"\ - "vmovdqu8 %xmm0, %xmm1\\n"\ - "vpbroadcastmb2q %k0, %xmm0\\n"', - "stdio.h", "LINK_AVX512_SKX"), - ("__asm__ volatile", '"xgetbv"', "stdio.h", "XGETBV"), - ] - -# function attributes -# tested via "int %s %s(void *);" % (attribute, name) -# function name will be converted to HAVE_<upper-case-name> preprocessor macro -OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', - 'attribute_optimize_unroll_loops'), - ('__attribute__((optimize("O3")))', - 'attribute_optimize_opt_3'), - ('__attribute__((optimize("O2")))', - 'attribute_optimize_opt_2'), - ('__attribute__((nonnull (1)))', - 'attribute_nonnull'), - ('__attribute__((target ("avx")))', - 'attribute_target_avx'), - ('__attribute__((target ("avx2")))', - 'attribute_target_avx2'), - ('__attribute__((target ("avx512f")))', - 'attribute_target_avx512f'), - ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', - 'attribute_target_avx512_skx'), - ] - -# function attributes with intrinsics -# To ensure your compiler can compile avx intrinsics with just the attributes -# gcc 4.8.4 support attributes but not with intrisics -# tested via "#include<%s> int %s %s(void *){code; return 0;};" % (header, attribute, name, code) -# function name will be converted to HAVE_<upper-case-name> preprocessor macro -# The _mm512_castps_si512 instruction is specific check for AVX-512F support -# in gcc-4.9 which is missing a subset of intrinsics. See -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878 -OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2,fma")))', - 'attribute_target_avx2_with_intrinsics', - '__m256 temp = _mm256_set1_ps(1.0); temp = \ - _mm256_fmadd_ps(temp, temp, temp)', - 'immintrin.h'), - ('__attribute__((target("avx512f")))', - 'attribute_target_avx512f_with_intrinsics', - '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))', - 'immintrin.h'), - ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', - 'attribute_target_avx512_skx_with_intrinsics', - '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\ - __m512i unused_temp = \ - _mm512_castps_si512(_mm512_set1_ps(1.0));\ - _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))', - 'immintrin.h'), - ] - -def fname2def(name): - return "HAVE_%s" % name.upper() - -def sym2def(symbol): - define = symbol.replace(' ', '') - return define.upper() - -def type2def(symbol): - define = symbol.replace(' ', '_') - return define.upper() - -# Code to detect long double representation taken from MPFR m4 macro -def check_long_double_representation(cmd): - cmd._check_compiler() - body = LONG_DOUBLE_REPRESENTATION_SRC % {'type': 'long double'} - - # Disable whole program optimization (the default on vs2015, with python 3.5+) - # which generates intermediary object files and prevents checking the - # float representation. - if sys.platform == "win32" and not mingw32(): - try: - cmd.compiler.compile_options.remove("/GL") - except (AttributeError, ValueError): - pass - - # Disable multi-file interprocedural optimization in the Intel compiler on Linux - # which generates intermediary object files and prevents checking the - # float representation. - elif (sys.platform != "win32" - and cmd.compiler.compiler_type.startswith('intel') - and '-ipo' in cmd.compiler.cc_exe): - newcompiler = cmd.compiler.cc_exe.replace(' -ipo', '') - cmd.compiler.set_executables( - compiler=newcompiler, - compiler_so=newcompiler, - compiler_cxx=newcompiler, - linker_exe=newcompiler, - linker_so=newcompiler + ' -shared' - ) - - # We need to use _compile because we need the object filename - src, obj = cmd._compile(body, None, None, 'c') - try: - ltype = long_double_representation(pyod(obj)) - return ltype - except ValueError: - # try linking to support CC="gcc -flto" or icc -ipo - # struct needs to be volatile so it isn't optimized away - # additionally "clang -flto" requires the foo struct to be used - body = body.replace('struct', 'volatile struct') - body += "int main(void) { return foo.before[0]; }\n" - src, obj = cmd._compile(body, None, None, 'c') - cmd.temp_files.append("_configtest") - cmd.compiler.link_executable([obj], "_configtest") - ltype = long_double_representation(pyod("_configtest")) - return ltype - finally: - cmd._clean() - -LONG_DOUBLE_REPRESENTATION_SRC = r""" -/* "before" is 16 bytes to ensure there's no padding between it and "x". - * We're not expecting any "long double" bigger than 16 bytes or with - * alignment requirements stricter than 16 bytes. */ -typedef %(type)s test_type; - -struct { - char before[16]; - test_type x; - char after[8]; -} foo = { - { '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', - '\001', '\043', '\105', '\147', '\211', '\253', '\315', '\357' }, - -123456789.0, - { '\376', '\334', '\272', '\230', '\166', '\124', '\062', '\020' } -}; -""" - -def pyod(filename): - """Python implementation of the od UNIX utility (od -b, more exactly). - - Parameters - ---------- - filename : str - name of the file to get the dump from. - - Returns - ------- - out : seq - list of lines of od output - - Notes - ----- - We only implement enough to get the necessary information for long double - representation, this is not intended as a compatible replacement for od. - """ - out = [] - with open(filename, 'rb') as fid: - yo2 = [oct(o)[2:] for o in fid.read()] - for i in range(0, len(yo2), 16): - line = ['%07d' % int(oct(i)[2:])] - line.extend(['%03d' % int(c) for c in yo2[i:i+16]]) - out.append(" ".join(line)) - return out - - -_BEFORE_SEQ = ['000', '000', '000', '000', '000', '000', '000', '000', - '001', '043', '105', '147', '211', '253', '315', '357'] -_AFTER_SEQ = ['376', '334', '272', '230', '166', '124', '062', '020'] - -_IEEE_DOUBLE_BE = ['301', '235', '157', '064', '124', '000', '000', '000'] -_IEEE_DOUBLE_LE = _IEEE_DOUBLE_BE[::-1] -_INTEL_EXTENDED_12B = ['000', '000', '000', '000', '240', '242', '171', '353', - '031', '300', '000', '000'] -_INTEL_EXTENDED_16B = ['000', '000', '000', '000', '240', '242', '171', '353', - '031', '300', '000', '000', '000', '000', '000', '000'] -_MOTOROLA_EXTENDED_12B = ['300', '031', '000', '000', '353', '171', - '242', '240', '000', '000', '000', '000'] -_IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000', - '000', '000', '000', '000', '000', '000', '000', '000'] -_IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1] -_IBM_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] + - ['000'] * 8) -_IBM_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] + - ['000'] * 8) - -def long_double_representation(lines): - """Given a binary dump as given by GNU od -b, look for long double - representation.""" - - # Read contains a list of 32 items, each item is a byte (in octal - # representation, as a string). We 'slide' over the output until read is of - # the form before_seq + content + after_sequence, where content is the long double - # representation: - # - content is 12 bytes: 80 bits Intel representation - # - content is 16 bytes: 80 bits Intel representation (64 bits) or quad precision - # - content is 8 bytes: same as double (not implemented yet) - read = [''] * 32 - saw = None - for line in lines: - # we skip the first word, as od -b output an index at the beginning of - # each line - for w in line.split()[1:]: - read.pop(0) - read.append(w) - - # If the end of read is equal to the after_sequence, read contains - # the long double - if read[-8:] == _AFTER_SEQ: - saw = copy.copy(read) - # if the content was 12 bytes, we only have 32 - 8 - 12 = 12 - # "before" bytes. In other words the first 4 "before" bytes went - # past the sliding window. - if read[:12] == _BEFORE_SEQ[4:]: - if read[12:-8] == _INTEL_EXTENDED_12B: - return 'INTEL_EXTENDED_12_BYTES_LE' - if read[12:-8] == _MOTOROLA_EXTENDED_12B: - return 'MOTOROLA_EXTENDED_12_BYTES_BE' - # if the content was 16 bytes, we are left with 32-8-16 = 16 - # "before" bytes, so 8 went past the sliding window. - elif read[:8] == _BEFORE_SEQ[8:]: - if read[8:-8] == _INTEL_EXTENDED_16B: - return 'INTEL_EXTENDED_16_BYTES_LE' - elif read[8:-8] == _IEEE_QUAD_PREC_BE: - return 'IEEE_QUAD_BE' - elif read[8:-8] == _IEEE_QUAD_PREC_LE: - return 'IEEE_QUAD_LE' - elif read[8:-8] == _IBM_DOUBLE_DOUBLE_LE: - return 'IBM_DOUBLE_DOUBLE_LE' - elif read[8:-8] == _IBM_DOUBLE_DOUBLE_BE: - return 'IBM_DOUBLE_DOUBLE_BE' - # if the content was 8 bytes, left with 32-8-8 = 16 bytes - elif read[:16] == _BEFORE_SEQ: - if read[16:-8] == _IEEE_DOUBLE_LE: - return 'IEEE_DOUBLE_LE' - elif read[16:-8] == _IEEE_DOUBLE_BE: - return 'IEEE_DOUBLE_BE' - - if saw is not None: - raise ValueError("Unrecognized format (%s)" % saw) - else: - # We never detected the after_sequence - raise ValueError("Could not lock sequences (%s)" % saw) - - -def check_for_right_shift_internal_compiler_error(cmd): - """ - On our arm CI, this fails with an internal compilation error - - The failure looks like the following, and can be reproduced on ARM64 GCC 5.4: - - <source>: In function 'right_shift': - <source>:4:20: internal compiler error: in expand_shift_1, at expmed.c:2349 - ip1[i] = ip1[i] >> in2; - ^ - Please submit a full bug report, - with preprocessed source if appropriate. - See <http://gcc.gnu.org/bugs.html> for instructions. - Compiler returned: 1 - - This function returns True if this compiler bug is present, and we need to - turn off optimization for the function - """ - cmd._check_compiler() - has_optimize = cmd.try_compile(textwrap.dedent("""\ - __attribute__((optimize("O3"))) void right_shift() {} - """), None, None) - if not has_optimize: - return False - - no_err = cmd.try_compile(textwrap.dedent("""\ - typedef long the_type; /* fails also for unsigned and long long */ - __attribute__((optimize("O3"))) void right_shift(the_type in2, the_type *ip1, int n) { - for (int i = 0; i < n; i++) { - if (in2 < (the_type)sizeof(the_type) * 8) { - ip1[i] = ip1[i] >> in2; - } - } - } - """), None, None) - return not no_err diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 13f4fb972..fe5aa9374 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -571,7 +571,6 @@ NPY_NO_EXPORT void /**begin repeat1 * #isa = , _avx2# - * #ISA = , AVX2# * #CHK = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX2)# * #ATTR = , NPY_GCC_TARGET_AVX2# */ |