diff options
author | uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-03-13 09:49:43 +0000 |
---|---|---|
committer | uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-03-13 09:49:43 +0000 |
commit | d3f00eb2cf4bc577869563a0db1b44f3e6b8765a (patch) | |
tree | 00e1a4f280957573f0753c2ca1663b6b508d2af3 /gcc/config/i386 | |
parent | 20c1b226452a83809bbf5c113b3ecb520a850785 (diff) | |
download | gcc-d3f00eb2cf4bc577869563a0db1b44f3e6b8765a.tar.gz |
PR target/35553
* config/i386/i386.h (TARGET_CPU_CPP_BUILTINS): Define
__SSE_USE_INLINED_FUNC__ when using -flag-keep-inline-functions
together with optimization.
* config/i386/xmmintrin.h: Use __SSE_USE_INLINED_FUNC__ instead of
__OPTIMIZE__ to choose between inlined intrinsic SSE function having
immediate arguments and its equivalent macro definition.
* config/i386/bmintrin.h: Ditto.
* config/i386/smmintrin.h: Ditto.
* config/i386/tmmintrin.h: Ditto.
* config/i386/mmintrin-common.h: Ditto.
* config/i386/ammintrin.h: Ditto.
* config/i386/emmintrin.h: Ditto.
testsuite/ChangeLog:
PR target/35553
* g++.dg/other/i386-3.C: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@133164 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/ammintrin.h | 4 | ||||
-rw-r--r-- | gcc/config/i386/bmmintrin.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/emmintrin.h | 8 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 5 | ||||
-rw-r--r-- | gcc/config/i386/mmintrin-common.h | 4 | ||||
-rw-r--r-- | gcc/config/i386/smmintrin.h | 22 | ||||
-rw-r--r-- | gcc/config/i386/tmmintrin.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/xmmintrin.h | 10 |
8 files changed, 31 insertions, 26 deletions
diff --git a/gcc/config/i386/ammintrin.h b/gcc/config/i386/ammintrin.h index 1351ebd9155..69ee2468357 100644 --- a/gcc/config/i386/ammintrin.h +++ b/gcc/config/i386/ammintrin.h @@ -55,7 +55,7 @@ _mm_extract_si64 (__m128i __X, __m128i __Y) return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y); } -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128i __attribute__((__always_inline__, __artificial__)) _mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L) { @@ -73,7 +73,7 @@ _mm_insert_si64 (__m128i __X,__m128i __Y) return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y); } -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128i __attribute__((__always_inline__, __artificial__)) _mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L) { diff --git a/gcc/config/i386/bmmintrin.h b/gcc/config/i386/bmmintrin.h index 4254d0fc17c..72f0efd0b9d 100644 --- a/gcc/config/i386/bmmintrin.h +++ b/gcc/config/i386/bmmintrin.h @@ -350,7 +350,7 @@ _mm_rot_epi64(__m128i __A, __m128i __B) /* Rotates - Immediate form */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128i __attribute__((__always_inline__, __artificial__)) _mm_roti_epi8(__m128i __A, const int __B) { diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index c5bbe8bf82a..1244958688c 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -880,7 +880,7 @@ _mm_cvtss_sd (__m128d __A, __m128 __B) return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); } -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128d __attribute__((__always_inline__, __artificial__)) _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) { @@ -1144,7 +1144,7 @@ _mm_srai_epi32 (__m128i __A, int __B) return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); } -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128i __attribute__((__always_inline__, __artificial__)) _mm_srli_si128 (__m128i __A, const int __N) { @@ -1307,7 +1307,7 @@ _mm_cmpgt_epi32 (__m128i __A, __m128i __B) return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B); } -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline int __attribute__((__always_inline__, __artificial__)) _mm_extract_epi16 (__m128i const __A, int const __N) { @@ -1363,7 +1363,7 @@ _mm_mulhi_epu16 (__m128i __A, __m128i __B) return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); } -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128i __attribute__((__always_inline__, __artificial__)) _mm_shufflehi_epi16 (__m128i __A, const int __mask) { diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 98cb72ac921..ea0a8204118 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -691,6 +691,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); builtin_define ("__SSE_MATH__"); \ if (TARGET_SSE_MATH && TARGET_SSE2) \ builtin_define ("__SSE2_MATH__"); \ + \ + /* Use inlined intrinsic SSE function having immediate \ + arguments instead of a macro definition. */ \ + if (optimize && !flag_keep_inline_functions) \ + builtin_define ("__SSE_USE_INLINED_FUNC__"); \ } \ while (0) diff --git a/gcc/config/i386/mmintrin-common.h b/gcc/config/i386/mmintrin-common.h index 3be83335f29..308c2c288cc 100644 --- a/gcc/config/i386/mmintrin-common.h +++ b/gcc/config/i386/mmintrin-common.h @@ -92,7 +92,7 @@ _mm_testnzc_si128 (__m128i __M, __m128i __V) /* Packed/scalar double precision floating point rounding. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128d __attribute__((__always_inline__, __artificial__)) _mm_round_pd (__m128d __V, const int __M) { @@ -117,7 +117,7 @@ _mm_round_sd(__m128d __D, __m128d __V, const int __M) /* Packed/scalar single precision floating point rounding. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128 __attribute__((__always_inline__, __artificial__)) _mm_round_ps (__m128 __V, const int __M) { diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h index 08bfd107232..bb70cb49a54 100644 --- a/gcc/config/i386/smmintrin.h +++ b/gcc/config/i386/smmintrin.h @@ -44,7 +44,7 @@ /* Integer blend instructions - select data from 2 sources using constant/variable mask. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128i __attribute__((__always_inline__, __artificial__)) _mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M) { @@ -69,7 +69,7 @@ _mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M) /* Single precision floating point blend instructions - select data from 2 sources using constant/variable mask. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128 __attribute__((__always_inline__, __artificial__)) _mm_blend_ps (__m128 __X, __m128 __Y, const int __M) { @@ -94,7 +94,7 @@ _mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M) /* Double precision floating point blend instructions - select data from 2 sources using constant/variable mask. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128d __attribute__((__always_inline__, __artificial__)) _mm_blend_pd (__m128d __X, __m128d __Y, const int __M) { @@ -119,7 +119,7 @@ _mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M) /* Dot product instructions with mask-defined summing and zeroing parts of result. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128 __attribute__((__always_inline__, __artificial__)) _mm_dp_ps (__m128 __X, __m128 __Y, const int __M) { @@ -224,7 +224,7 @@ _mm_mul_epi32 (__m128i __X, __m128i __Y) index, the bits [5-4] define D index, and bits [3-0] define zeroing mask for D. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128 __attribute__((__always_inline__, __artificial__)) _mm_insert_ps (__m128 __D, __m128 __S, const int __N) { @@ -244,7 +244,7 @@ _mm_insert_ps (__m128 __D, __m128 __S, const int __N) /* Extract binary representation of single precision float from packed single precision array element of X selected by index N. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline int __attribute__((__always_inline__, __artificial__)) _mm_extract_ps (__m128 __X, const int __N) { @@ -277,7 +277,7 @@ _mm_extract_ps (__m128 __X, const int __N) /* Insert integer, S, into packed integer array element of D selected by index N. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128i __attribute__((__always_inline__, __artificial__)) _mm_insert_epi8 (__m128i __D, int __S, const int __N) { @@ -319,7 +319,7 @@ _mm_insert_epi64 (__m128i __D, long long __S, const int __N) /* Extract integer from packed integer array element of X selected by index N. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline int __attribute__((__always_inline__, __artificial__)) _mm_extract_epi8 (__m128i __X, const int __N) { @@ -447,7 +447,7 @@ _mm_packus_epi32 (__m128i __X, __m128i __Y) byte integers in the first 2 operands. Starting offsets within operands are determined by the 3rd mask operand. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128i __attribute__((__always_inline__, __artificial__)) _mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M) { @@ -497,7 +497,7 @@ _mm_stream_load_si128 (__m128i *__X) /* Intrinsics for text/string processing. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128i __attribute__((__always_inline__, __artificial__)) _mm_cmpistrm (__m128i __X, __m128i __Y, const int __M) { @@ -550,7 +550,7 @@ _mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) /* Intrinsics for text/string processing and reading values of EFlags. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline int __attribute__((__always_inline__, __artificial__)) _mm_cmpistra (__m128i __X, __m128i __Y, const int __M) { diff --git a/gcc/config/i386/tmmintrin.h b/gcc/config/i386/tmmintrin.h index 0e6a0d49e6f..6fd20b1974c 100644 --- a/gcc/config/i386/tmmintrin.h +++ b/gcc/config/i386/tmmintrin.h @@ -181,7 +181,7 @@ _mm_sign_pi32 (__m64 __X, __m64 __Y) return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y); } -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128i __attribute__((__always_inline__, __artificial__)) _mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N) { diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index cfe55ef28e8..e0cf958f2bb 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -716,7 +716,7 @@ _mm_cvtps_pi8(__m128 __A) } /* Selects four specific SPFP values from A and B based on MASK. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m128 __attribute__((__always_inline__, __artificial__)) _mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask) { @@ -992,7 +992,7 @@ _mm_move_ss (__m128 __A, __m128 __B) } /* Extracts one of the four words of A. The selector N must be immediate. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline int __attribute__((__always_inline__, __artificial__)) _mm_extract_pi16 (__m64 const __A, int const __N) { @@ -1013,7 +1013,7 @@ _m_pextrw (__m64 const __A, int const __N) /* Inserts word D into one of four words of A. The selector N must be immediate. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_insert_pi16 (__m64 const __A, int const __D, int const __N) { @@ -1114,7 +1114,7 @@ _m_pmulhuw (__m64 __A, __m64 __B) /* Return a combination of the four 16-bit values in A. The selector must be an immediate. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_shuffle_pi16 (__m64 __A, int const __N) { @@ -1191,7 +1191,7 @@ _m_psadbw (__m64 __A, __m64 __B) /* Loads one cache line from address P to a location "closer" to the processor. The selector I specifies the type of prefetch operation. */ -#ifdef __OPTIMIZE__ +#ifdef __SSE_USE_INLINED_FUNC__ static __inline void __attribute__((__always_inline__, __artificial__)) _mm_prefetch (const void *__P, enum _mm_hint __I) { |