diff options
author | Hongtao Liu <liuhongt@gcc.gnu.org> | 2019-05-08 10:21:40 +0000 |
---|---|---|
committer | Hongtao Liu <liuhongt@gcc.gnu.org> | 2019-05-08 10:21:40 +0000 |
commit | 4f0e90fae97a894247ec93336c8826cf4afb3d0d (patch) | |
tree | 0cfa0ff6eb03bc72ee6f2320052fe38d3e8501e6 /gcc/config/i386/avx512bf16intrin.h | |
parent | da2d30c199a6b6866593c20dbd84673c1637be89 (diff) | |
download | gcc-4f0e90fae97a894247ec93336c8826cf4afb3d0d.tar.gz |
Enable support for bfloat16 which will be in Future Cooper Lake.
There are 3 instructions for AVX512BF16: VCVTNE2PS2BF16, VCVTNEPS2BF16 and
DPBF16PS instructions, which are Vector Neural Network Instructions
supporting:
- VCVTNE2PS2BF16: Convert Two Packed Single Data to One Packed BF16 Data.
- VCVTNEPS2BF16: Convert Packed Single Data to Packed BF16 Data.
- VDPBF16PS: Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
2019-05-07 Wei Xiao <wei3.xiao@intel.com>
* common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BF16_SET
OPTION_MASK_ISA_AVX512BF16_UNSET, OPTION_MASK_ISA2_AVX512BW_UNSET): New.
(OPTION_MASK_ISA2_AVX512F_UNSET): Add OPTION_MASK_ISA_AVX512BF16_UNSET.
(ix86_handle_option): Handle -mavx512bf16.
* config.gcc: Add avx512bf16vlintrin.h and avx512bf16intrin.h
to extra_headers.
* config/i386/avx512bf16vlintrin.h: New.
* config/i386/avx512bf16intrin.h: New.
* config/i386/cpuid.h (bit_AVX512BF16): New.
* config/i386/driver-i386.c (host_detect_local_cpu): Detect BF16.
* config/i386/i386-builtin-types.def: Add new types.
* config/i386/i386-builtin.def: Add new builtins.
* config/i386/i386-c.c (ix86_target_macros_internal): Define
__AVX512BF16__.
* config/i386/i386-option.c (ix86_target_string): Add -mavx512bf16.
(ix86_option_override_internal): Handle BF16.
(ix86_valid_target_attribute_inner_p): Ditto.
* config/i386/i386-expand.c (ix86_expand_args_builtin): Ditto.
* config/i386/i386-builtin.c (enum processor_features): Add
F_AVX512BF16.
(static const _isa_names_table isa_names_table): Ditto.
* config/i386/i386.h (TARGET_AVX512BF16, TARGET_AVX512BF16_P): New.
(PTA_AVX512BF16): Ditto.
* config/i386/i386.opt: Add -mavx512bf16.
* config/i386/immintrin.h: Include avx512bf16intrin.h
and avx512bf16vlintrin.h.
* config/i386/sse.md (avx512f_cvtne2ps2bf16_<mode><mask_name>,
avx512f_cvtneps2bf16_<mode><mask_name>,
avx512f_dpbf16ps_<mode><mask_half_name>): New define_insn patterns.
* config/i386/subst.md (mask_half): Add new subst.
* doc/invoke.texi: Document -mavx512bf16.
2019-05-07 Wei Xiao <wei3.xiao@intel.com>
* gcc.target/i386/avx512bf16-vcvtne2ps2bf16-1.c: New test.
* gcc.target/i386/avx512bf16-vcvtneps2bf16-1.c: New test.
* gcc.target/i386/avx512bf16-vdpbf16ps-1.c: New test.
* gcc.target/i386/avx512bf16vl-vcvtne2ps2bf16-1.c: New test.
* gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c: New test.
* gcc.target/i386/avx512bf16vl-vdpbf16ps-1.c: New test.
* gcc.target/i386/builtin_target.c: Handle avx512bf16.
* gcc.target/i386/sse-12.c: Add -mavx512bf16.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* g++.dg/other/i386-2.C: Ditto.
* g++.dg/other/i386-3.C: Ditto.
2019-05-07 Hongtao Liu <hongtao.liu@intel.com>
* config/i386/cpuinfo.c (get_available_features): Detect BF16.
* config/i386/cpuinfo.h (enum processor_features): Add
FEATURE_AVX512BF16.
From-SVN: r271006
Diffstat (limited to 'gcc/config/i386/avx512bf16intrin.h')
-rw-r--r-- | gcc/config/i386/avx512bf16intrin.h | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/gcc/config/i386/avx512bf16intrin.h b/gcc/config/i386/avx512bf16intrin.h new file mode 100644 index 00000000000..cc983bdf590 --- /dev/null +++ b/gcc/config/i386/avx512bf16intrin.h @@ -0,0 +1,118 @@ +/* Copyright (C) 2019 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVX512BF16INTRIN_H_INCLUDED +#define _AVX512BF16INTRIN_H_INCLUDED + +#ifndef __AVX512BF16__ +#pragma GCC push_options +#pragma GCC target("avx512bf16") +#define __DISABLE_AVX512BF16__ +#endif /* __AVX512BF16__ */ + +/* Internal data types for implementing the intrinsics. */ +typedef short __v32bh __attribute__ ((__vector_size__ (64))); + +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__)); + +/* vcvtne2ps2bf16 */ + +extern __inline __m512bh +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtne2ps_pbh (__m512 __A, __m512 __B) +{ + return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B); +} + +extern __inline __m512bh +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D) +{ + return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, __B); +} + +extern __inline __m512bh +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C) +{ + return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_maskz(__B, __C, __A); +} + +/* vcvtneps2bf16 */ + +extern __inline __m256bh +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtneps_pbh (__m512 __A) +{ + return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A); +} + +extern __inline __m256bh +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C) +{ + return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B); +} + +extern __inline __m256bh +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B) +{ + return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A); +} + +/* vdpbf16ps */ + +extern __inline __m512 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C) +{ + return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C); +} + +extern __inline __m512 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D) +{ + return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B); +} + +extern __inline __m512 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D) +{ + return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A); +} + +#ifdef __DISABLE_AVX512BF16__ +#undef __DISABLE_AVX512BF16__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512BF16__ */ + +#endif /* _AVX512BF16INTRIN_H_INCLUDED */ |