summaryrefslogtreecommitdiff
path: root/gcc/config/i386/avx512bf16intrin.h
diff options
context:
space:
mode:
authorHongtao Liu <liuhongt@gcc.gnu.org>2019-05-08 10:21:40 +0000
committerHongtao Liu <liuhongt@gcc.gnu.org>2019-05-08 10:21:40 +0000
commit4f0e90fae97a894247ec93336c8826cf4afb3d0d (patch)
tree0cfa0ff6eb03bc72ee6f2320052fe38d3e8501e6 /gcc/config/i386/avx512bf16intrin.h
parentda2d30c199a6b6866593c20dbd84673c1637be89 (diff)
downloadgcc-4f0e90fae97a894247ec93336c8826cf4afb3d0d.tar.gz
Enable support for bfloat16 which will be in Future Cooper Lake.
There are 3 instructions for AVX512BF16: VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector Neural Network Instructions supporting: - VCVTNE2PS2BF16: Convert Two Packed Single Data to One Packed BF16 Data. - VCVTNEPS2BF16: Convert Packed Single Data to Packed BF16 Data. - VDPBF16PS: Dot Product of BF16 Pairs Accumulated into Packed Single Precision. 2019-05-07 Wei Xiao <wei3.xiao@intel.com> * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BF16_SET OPTION_MASK_ISA_AVX512BF16_UNSET, OPTION_MASK_ISA2_AVX512BW_UNSET): New. (OPTION_MASK_ISA2_AVX512F_UNSET): Add OPTION_MASK_ISA_AVX512BF16_UNSET. (ix86_handle_option): Handle -mavx512bf16. * config.gcc: Add avx512bf16vlintrin.h and avx512bf16intrin.h to extra_headers. * config/i386/avx512bf16vlintrin.h: New. * config/i386/avx512bf16intrin.h: New. * config/i386/cpuid.h (bit_AVX512BF16): New. * config/i386/driver-i386.c (host_detect_local_cpu): Detect BF16. * config/i386/i386-builtin-types.def: Add new types. * config/i386/i386-builtin.def: Add new builtins. * config/i386/i386-c.c (ix86_target_macros_internal): Define __AVX512BF16__. * config/i386/i386-option.c (ix86_target_string): Add -mavx512bf16. (ix86_option_override_internal): Handle BF16. (ix86_valid_target_attribute_inner_p): Ditto. * config/i386/i386-expand.c (ix86_expand_args_builtin): Ditto. * config/i386/i386-builtin.c (enum processor_features): Add F_AVX512BF16. (static const _isa_names_table isa_names_table): Ditto. * config/i386/i386.h (TARGET_AVX512BF16, TARGET_AVX512BF16_P): New. (PTA_AVX512BF16): Ditto. * config/i386/i386.opt: Add -mavx512bf16. * config/i386/immintrin.h: Include avx512bf16intrin.h and avx512bf16vlintrin.h. * config/i386/sse.md (avx512f_cvtne2ps2bf16_<mode><mask_name>, avx512f_cvtneps2bf16_<mode><mask_name>, avx512f_dpbf16ps_<mode><mask_half_name>): New define_insn patterns. * config/i386/subst.md (mask_half): Add new subst. * doc/invoke.texi: Document -mavx512bf16. 2019-05-07 Wei Xiao <wei3.xiao@intel.com> * gcc.target/i386/avx512bf16-vcvtne2ps2bf16-1.c: New test. * gcc.target/i386/avx512bf16-vcvtneps2bf16-1.c: New test. * gcc.target/i386/avx512bf16-vdpbf16ps-1.c: New test. * gcc.target/i386/avx512bf16vl-vcvtne2ps2bf16-1.c: New test. * gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c: New test. * gcc.target/i386/avx512bf16vl-vdpbf16ps-1.c: New test. * gcc.target/i386/builtin_target.c: Handle avx512bf16. * gcc.target/i386/sse-12.c: Add -mavx512bf16. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * g++.dg/other/i386-2.C: Ditto. * g++.dg/other/i386-3.C: Ditto. 2019-05-07 Hongtao Liu <hongtao.liu@intel.com> * config/i386/cpuinfo.c (get_available_features): Detect BF16. * config/i386/cpuinfo.h (enum processor_features): Add FEATURE_AVX512BF16. From-SVN: r271006
Diffstat (limited to 'gcc/config/i386/avx512bf16intrin.h')
-rw-r--r--gcc/config/i386/avx512bf16intrin.h118
1 files changed, 118 insertions, 0 deletions
diff --git a/gcc/config/i386/avx512bf16intrin.h b/gcc/config/i386/avx512bf16intrin.h
new file mode 100644
index 00000000000..cc983bdf590
--- /dev/null
+++ b/gcc/config/i386/avx512bf16intrin.h
@@ -0,0 +1,118 @@
+/* Copyright (C) 2019 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512BF16INTRIN_H_INCLUDED
+#define _AVX512BF16INTRIN_H_INCLUDED
+
+#ifndef __AVX512BF16__
+#pragma GCC push_options
+#pragma GCC target("avx512bf16")
+#define __DISABLE_AVX512BF16__
+#endif /* __AVX512BF16__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef short __v32bh __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
+
+/* vcvtne2ps2bf16 */
+
+extern __inline __m512bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
+{
+ return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B);
+}
+
+extern __inline __m512bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D)
+{
+ return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, __B);
+}
+
+extern __inline __m512bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
+{
+ return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_maskz(__B, __C, __A);
+}
+
+/* vcvtneps2bf16 */
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtneps_pbh (__m512 __A)
+{
+ return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A);
+}
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C)
+{
+ return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B);
+}
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B)
+{
+ return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A);
+}
+
+/* vdpbf16ps */
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D)
+{
+ return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
+{
+ return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
+}
+
+#ifdef __DISABLE_AVX512BF16__
+#undef __DISABLE_AVX512BF16__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BF16__ */
+
+#endif /* _AVX512BF16INTRIN_H_INCLUDED */