diff options
author | hubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4> | 2002-10-21 22:09:06 +0000 |
---|---|---|
committer | hubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4> | 2002-10-21 22:09:06 +0000 |
commit | 4901c1b4f34955b9b6c8cba15eab6fce033c02a6 (patch) | |
tree | 16f0b324b292b656645c28993c41149e6ff56366 /gcc/config/i386/xmmintrin.h | |
parent | a08f958e28a4a392dada58cc8186f42bacdd15ce (diff) | |
download | gcc-4901c1b4f34955b9b6c8cba15eab6fce033c02a6.tar.gz |
* i386.c (builtin_description): Add punpcklqdq and movdq2q
(ix86_init_mmx_sse_builtins): Add v2di_ftype_void, di_ftype_v2di,
v16qi_ftype_pchar, void_ftype_pchar_v16qi, v4si_ftype_pchar,
void_ftype_pchar_v4si; Initialize __builtin_ia32_movdq2q,
__builtin_ia32_loaddqa, __builtin_ia32_loaddqu, __builtin_ia32_loadd
__builtin_ia32_storedqa, __builtin_ia32_storedqu, __builtin_ia32_stored
__builtin_ia32_setzero128.
(ix86_expand_builtin): Handle IX86_BUILTIN_CLRTI, IX86_BUILTIN_LOADDQA,
IX86_BUILTIN_LOADDQU, IX86_BUILTIN_LOADD, IX86_BUILTIN_STOREDQA,
IX86_BUILTIN_STOREDQU, IX86_BUILTIN_STORED, Ix86_BUILTIN_MOVQ.
* i386.h (ix86_builtins): Add IX86_BUILTIN_LOADDQA, IX86_BUILTIN_LOADDQU,
IX86_BUILTIN_STOREDQA, IX86_BUILTIN_STOREDQU, IX86_BUILTIN_LOADD,
IX86_BUILTIN_STORED, IX86_BUILTIN_CLRTI, IX86_BUILTIN_MOVDQ2Q,
IX86_BUILTIN_PUNPCKLQDQ128, Ix86_BUILTIN_MOVQ.
* i386.md (sse2_punpcklqdq, sse2_movqsse2_loadd, sse2_stored,
sse2_movq): New patterns.
(sse2_movdqa, sse2_movdqu, sse2_movdq2q): Fix.
* xmmintrin.h (_mm_load_si128, _mm_loadu_si128, _mm_loadl_epi64,
_mm_store_si128, _mm_storeu_si128, _mm_storel_epi64,
_mm_setzero_si128, _mm_set_epi64, _mm_set_epi32, _mm_set_epi16,
_mm_set_epi8, _mm_set1_epi64, _mm_set1_epi32, _mm_set1_epi16,
_mm_set1_epi8, _mm_setr_epi64, _mm_setr_epi32, _mm_setr_epi16,
_mm_setr_epi8, _mm_unpacklo_epi64,_mm_set_moveq): New functions.
(_mm_insert_epi16): Fix.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@58391 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/i386/xmmintrin.h')
-rw-r--r-- | gcc/config/i386/xmmintrin.h | 248 |
1 files changed, 247 insertions, 1 deletions
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 8c5d41b660e..08ef76ad1a0 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -1581,6 +1581,246 @@ _mm_ucomineq_sd (__m128d __A, __m128d __B) return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B); } +/* Create a vector with element 0 as *P and the rest zero. */ + +static __inline __m128i +_mm_load_si128 (__m128i const *__P) +{ + return (__m128i) __builtin_ia32_loaddqa (__P); +} + +static __inline __m128i +_mm_loadu_si128 (__m128i const *__P) +{ + return (__m128i) __builtin_ia32_loaddqu (__P); +} + +static __inline __m128i +_mm_loadl_epi64 (__m128i const *__P) +{ + return (__m128i) __builtin_ia32_movq2dq (*(unsigned long long *)__P); +} + +static __inline void +_mm_store_si128 (__m128i *__P, __m128i __B) +{ + __builtin_ia32_storedqa (__P, (__v16qi)__B); +} + +static __inline void +_mm_storeu_si128 (__m128i *__P, __m128i __B) +{ + __builtin_ia32_storedqu (__P, (__v16qi)__B); +} + +static __inline void +_mm_storel_epi64 (__m128i *__P, __m128i __B) +{ + *(long long *)__P = __builtin_ia32_movdq2q ((__v2di)__B); +} + +static __inline __m128i +_mm_move_epi64 (__m128i __A) +{ + return (__m128i) __builtin_ia32_movq ((__v2di)__A); +} + +/* Create a vector of zeros. */ +static __inline __m128i +_mm_setzero_si128 (void) +{ + return (__m128i) __builtin_ia32_setzero128 (); +} + +static __inline __m128i +_mm_set_epi64 (__m64 __A, __m64 __B) +{ + __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); + __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B); + return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp2, __tmp); +} + +/* Create the vector [Z Y X W]. */ +static __inline __m128i +_mm_set_epi32 (int __Z, int __Y, int __X, int __W) +{ + union { + int __a[4]; + __m128i __v; + } __u; + + __u.__a[0] = __W; + __u.__a[1] = __X; + __u.__a[2] = __Y; + __u.__a[3] = __Z; + + return __u.__v; +} +/* Create the vector [S T U V Z Y X W]. */ +static __inline __m128i +_mm_set_epi16 (short __Z, short __Y, short __X, short __W, + short __V, short __U, short __T, short __S) +{ + union { + short __a[8]; + __m128i __v; + } __u; + + __u.__a[0] = __S; + __u.__a[1] = __T; + __u.__a[2] = __U; + __u.__a[3] = __V; + __u.__a[4] = __W; + __u.__a[5] = __X; + __u.__a[6] = __Y; + __u.__a[7] = __Z; + + return __u.__v; +} + +/* Create the vector [S T U V Z Y X W]. */ +static __inline __m128i +_mm_set_epi8 (char __Z, char __Y, char __X, char __W, + char __V, char __U, char __T, char __S, + char __Z1, char __Y1, char __X1, char __W1, + char __V1, char __U1, char __T1, char __S1) +{ + union { + char __a[16]; + __m128i __v; + } __u; + + __u.__a[0] = __S1; + __u.__a[1] = __T1; + __u.__a[2] = __U1; + __u.__a[3] = __V1; + __u.__a[4] = __W1; + __u.__a[5] = __X1; + __u.__a[6] = __Y1; + __u.__a[7] = __Z1; + __u.__a[8] = __S; + __u.__a[9] = __T; + __u.__a[10] = __U; + __u.__a[11] = __V; + __u.__a[12] = __W; + __u.__a[13] = __X; + __u.__a[14] = __Y; + __u.__a[15] = __Z; + + return __u.__v; +} + +static __inline __m128i +_mm_set1_epi64 (__m64 __A) +{ + __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); + return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp); +} + +static __inline __m128i +_mm_set1_epi32 (int __A) +{ + __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__A); + return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0)); +} + +static __inline __m128i +_mm_set1_epi16 (short __A) +{ + int __Acopy = (unsigned short)__A; + __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy); + __tmp = (__v4si)__builtin_ia32_punpcklwd128 ((__v8hi)__tmp, (__v8hi)__tmp); + return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0)); +} + +static __inline __m128i +_mm_set1_epi8 (char __A) +{ + int __Acopy = (unsigned char)__A; + __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy); + __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp); + __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp); + return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0)); +} + +static __inline __m128i +_mm_setr_epi64 (__m64 __A, __m64 __B) +{ + __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); + __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B); + return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp2); +} + +/* Create the vector [Z Y X W]. */ +static __inline __m128i +_mm_setr_epi32 (int __W, int __X, int __Y, int __Z) +{ + union { + int __a[4]; + __m128i __v; + } __u; + + __u.__a[0] = __W; + __u.__a[1] = __X; + __u.__a[2] = __Y; + __u.__a[3] = __Z; + + return __u.__v; +} +/* Create the vector [S T U V Z Y X W]. */ +static __inline __m128i +_mm_setr_epi16 (short __S, short __T, short __U, short __V, + short __W, short __X, short __Y, short __Z) +{ + union { + short __a[8]; + __m128i __v; + } __u; + + __u.__a[0] = __S; + __u.__a[1] = __T; + __u.__a[2] = __U; + __u.__a[3] = __V; + __u.__a[4] = __W; + __u.__a[5] = __X; + __u.__a[6] = __Y; + __u.__a[7] = __Z; + + return __u.__v; +} + +/* Create the vector [S T U V Z Y X W]. */ +static __inline __m128i +_mm_setr_epi8 (char __S1, char __T1, char __U1, char __V1, + char __W1, char __X1, char __Y1, char __Z1, + char __S, char __T, char __U, char __V, + char __W, char __X, char __Y, char __Z) +{ + union { + char __a[16]; + __m128i __v; + } __u; + + __u.__a[0] = __S1; + __u.__a[1] = __T1; + __u.__a[2] = __U1; + __u.__a[3] = __V1; + __u.__a[4] = __W1; + __u.__a[5] = __X1; + __u.__a[6] = __Y1; + __u.__a[7] = __Z1; + __u.__a[8] = __S; + __u.__a[9] = __T; + __u.__a[10] = __U; + __u.__a[11] = __V; + __u.__a[12] = __W; + __u.__a[13] = __X; + __u.__a[14] = __Y; + __u.__a[15] = __Z; + + return __u.__v; +} + static __inline __m128d _mm_cvtepi32_pd (__m128i __A) { @@ -1776,6 +2016,12 @@ _mm_unpacklo_epi32 (__m128i __A, __m128i __B) } static __inline __m128i +_mm_unpacklo_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B); +} + +static __inline __m128i _mm_add_epi8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B); @@ -2075,7 +2321,7 @@ _mm_cmpgt_epi32 (__m128i __A, __m128i __B) #define _mm_extract_epi16(__A, __B) __builtin_ia32_pextrw128 ((__v8hi)__A, __B) -#define _mm_insert_epi16 (__A, __B, __C) ((__m128i)__builtin_ia32_pinsrw128 ((__v8hi)__A, __B, __C)) +#define _mm_insert_epi16(__A, __B, __C) ((__m128i)__builtin_ia32_pinsrw128 ((__v8hi)__A, __B, __C)) static __inline __m128i _mm_max_epi16 (__m128i __A, __m128i __B) |