diff options
-rw-r--r-- | gcc/ChangeLog | 69 | ||||
-rw-r--r-- | gcc/config/i386/i386-modes.def | 1 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 127 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 1 | ||||
-rw-r--r-- | gcc/config/i386/mmintrin.h | 37 | ||||
-rw-r--r-- | gcc/config/i386/mmx.md | 101 | ||||
-rw-r--r-- | gcc/doc/extend.texi | 18 |
8 files changed, 212 insertions, 147 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e32764a93a4..1ffe9d59b98 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,72 @@ +2008-03-08 Uros Bizjak <ubizjak@gmail.com> + + PR target/22152 + * config/i386/i386-modes.def (V1DI): New vector mode. + * config/i386/i386.h (VALID_MMX_REG_MODE): Add V1DImode. + * config/i386/mmx.md (MMXMODEI8): New mode iterator. + (MMXMODE248): Ditto. + (MMXMODE): Add V1DI mode. + (mmxvecsize): Change DI mode to V1DI mode. + ("mov<mode>): Use MMXMODEI8 mode iterator. + ("*mov<mode>_internal_rex64"): Ditto. + ("*mov<mode>_internal"): Ditto. + ("mmx_add<mode>3"): Ditto. Handle V1DImode for TARGET_SSE2. + ("mmx_sub<mode>3"): Ditto. + ("mmx_adddi3"): Remove insn pattern. + ("mmx_subdi3"): Ditto. + ("mmx_ashr<mode>3"): Use SImode and "yN" constraint for operand 2. + ("mmx_lshr<mode>3"): Ditto. Use MMXMODE248 mode iterator. + ("mmx_ashl<mode>3"): Ditto. + ("mmx_lshrdi3"): Remove insn pattern. + ("mmx_ashldi3"): Ditto. + * config/i386/i386.c (classify_argument): Handle V1DImode. + (function_arg_advance_32): Ditto. + (function_arg_32): Ditto. + (struct builtin_description) [IX86_BUILTIN_PADDQ]: Use + mmx_addv1di3 insn pattern. + [IX86_BUILTIN_PSUBQ]: Use mmx_subv1di3 insn pattern. + [IX86_BUILTIN_PSLL?, IX86_BUILTIN_PSRL?, IX86_BUILTIN_PSRA?, + IX86_BUILTIN_PSLL?I, IX86_BUILTIN_PSRL?I, IX86_BUILTIN_PSRA?I, + IX86_BUILTIN_PSLL?I128, IX86_BUILTIN_PSRL?I128, IX86_BUILTIN_PSRA?I128]: + Remove definitions of built-in functions. + (V1DI_type_node): New node. + (v1di_ftype_v1di_int): Ditto. + (v1di_ftype_v1di_v1di): Ditto. + (v2si_ftype_v2si_si): Ditto. + (v4hi_ftype_v4hi_di): Remove node. + (v2si_ftype_v2si_di): Ditto. + (ix86_init_mmx_sse_builtins): Handle V1DImode. + (__builtin_ia32_psll?, __builtin_ia32_psrl?, __builtin_ia32_psra?): + Redefine builtins using def_builtin_const with *_ftype_*_int node. + (__builtin_ia32_psll?i, __builtin_ia32_psrl?i, __builtin_ia32_psra?i): + Add new builtins using def_builtin_const. + (ix86_expand_builtin) [IX86_BUILTIN_PSLL?, IX86_BUILTIN_PSRL?, + IX86_BUILTIN_PSRA?, IX86_BUILTIN_PSLL?I, IX86_BUILTIN_PSRL?I, + IX86_BUILTIN_PSRA?I]: Handle builtin definitions. + * config/i386/mmintrin.h (__v1di): New typedef. + (_mm_add_si64): Cast arguments to __v1di type. + (_mm_sub_si64): Ditto. + (_mm_sll_pi16): Cast __count to __v4hi type. + (_mm_sll_pi32): Cast __count to __v2si type. + (_mm_sll_si64): Cast arguments to __v1di type. + (_mm_srl_pi16): Cast __count to __v4hi type. + (_mm_srl_pi32): Cast __count to __v2si type. + (_mm_srl_si64): Cast arguments to __v1di type. + (_mm_sra_pi16): Cast __count to __v4hi type. + (_mm_sra_pi32): Cast __count to __v2si type. + (_mm_slli_pi16): Use __builtin_ia32_psllwi. + (_mm_slli_pi32): Use __builtin_ia32_pslldi. + (_mm_slli_si64): Use __builtin_ia32_psllqi. Cast __m to __v1di type. + (_mm_srli_pi16): Use __builtin_ia32_psrlwi. + (_mm_srli_pi32): Use __builtin_ia32_psrldi. + (_mm_srli_si64): Use __builtin_ia32_psrlqi. Cast __m to __v1di type. + (_mm_srai_pi16): Use __builtin_ia32_psrawi. + (_mm_srai_pi32): Use __builtin_ia32_psradi. + * config/i386/i386.md (UNSPEC_NOP): Remove unspec definition. + * doc/extend.texi (X86 Built-in Functions) [__builtin_ia32_psll?, + __builtin_ia32_psrl?, __builtin_ia32_psra?, __builtin_ia32_psll?i, + __builtin_ia32_psrl?i, __builtin_ia32_psra?i]: Add new builtins. + 2008-03-07 Joseph Myers <joseph@codesourcery.com> * doc/include/texinfo.tex: Update to version 2008-03-07.10. diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def index f2f2b4f2acb..a23e8203ff3 100644 --- a/gcc/config/i386/i386-modes.def +++ b/gcc/config/i386/i386-modes.def @@ -79,6 +79,7 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ +VECTOR_MODE (INT, DI, 1); /* V1DI */ VECTOR_MODE (INT, QI, 2); /* V2QI */ VECTOR_MODE (INT, DI, 4); /* V4DI */ VECTOR_MODE (INT, SI, 8); /* V8SI */ diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 844bb756d94..f1618e617ba 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -3838,7 +3838,7 @@ classify_argument (enum machine_mode mode, const_tree type, } /* for V1xx modes, just use the base mode */ - if (VECTOR_MODE_P (mode) + if (VECTOR_MODE_P (mode) && mode != V1DImode && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) mode = GET_MODE_INNER (mode); @@ -3910,6 +3910,7 @@ classify_argument (enum machine_mode mode, const_tree type, classes[0] = X86_64_SSE_CLASS; classes[1] = X86_64_SSEUP_CLASS; return 2; + case V1DImode: case V2SFmode: case V2SImode: case V4HImode: @@ -4211,6 +4212,7 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, case V4HImode: case V2SImode: case V2SFmode: + case V1DImode: if (!type || !AGGREGATE_TYPE_P (type)) { cum->mmx_words += words; @@ -4374,6 +4376,7 @@ function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, case V4HImode: case V2SImode: case V2SFmode: + case V1DImode: if (!type || !AGGREGATE_TYPE_P (type)) { if (!TARGET_MMX && !warnedmmx && cum->warn_mmx) @@ -17955,11 +17958,11 @@ static const struct builtin_description bdesc_2arg[] = { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 }, @@ -18010,25 +18013,6 @@ static const struct builtin_description bdesc_2arg[] = { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 }, - - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 }, - - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 }, @@ -18140,17 +18124,6 @@ static const struct builtin_description bdesc_2arg[] = { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 }, @@ -18212,6 +18185,7 @@ static const struct builtin_description bdesc_2arg[] = static const struct builtin_description bdesc_1arg[] = { + /* SSE */ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 }, @@ -18228,6 +18202,7 @@ static const struct builtin_description bdesc_1arg[] = { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 }, + /* SSE2 */ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 }, @@ -18585,6 +18560,8 @@ ix86_init_mmx_sse_builtins (void) tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode); tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); + tree V1DI_type_node + = build_vector_type_for_mode (long_long_integer_type_node, V1DImode); tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode); tree V2DI_type_node = build_vector_type_for_mode (long_long_integer_type_node, V2DImode); @@ -18649,14 +18626,13 @@ ix86_init_mmx_sse_builtins (void) tree v4hi_ftype_v4hi_int = build_function_type_list (V4HI_type_node, V4HI_type_node, integer_type_node, NULL_TREE); - tree v4hi_ftype_v4hi_di - = build_function_type_list (V4HI_type_node, - V4HI_type_node, long_long_unsigned_type_node, - NULL_TREE); - tree v2si_ftype_v2si_di + tree v2si_ftype_v2si_int = build_function_type_list (V2SI_type_node, - V2SI_type_node, long_long_unsigned_type_node, - NULL_TREE); + V2SI_type_node, integer_type_node, NULL_TREE); + tree v1di_ftype_v1di_int + = build_function_type_list (V1DI_type_node, + V1DI_type_node, integer_type_node, NULL_TREE); + tree void_ftype_void = build_function_type (void_type_node, void_list_node); tree void_ftype_unsigned @@ -18723,10 +18699,9 @@ ix86_init_mmx_sse_builtins (void) tree v2si_ftype_v2si_v2si = build_function_type_list (V2SI_type_node, V2SI_type_node, V2SI_type_node, NULL_TREE); - tree di_ftype_di_di - = build_function_type_list (long_long_unsigned_type_node, - long_long_unsigned_type_node, - long_long_unsigned_type_node, NULL_TREE); + tree v1di_ftype_v1di_v1di + = build_function_type_list (V1DI_type_node, + V1DI_type_node, V1DI_type_node, NULL_TREE); tree di_ftype_di_di_int = build_function_type_list (long_long_unsigned_type_node, @@ -19182,8 +19157,8 @@ ix86_init_mmx_sse_builtins (void) case V2SImode: type = v2si_ftype_v2si_v2si; break; - case DImode: - type = di_ftype_di_di; + case V1DImode: + type = v1di_ftype_v1di_v1di; break; default: @@ -19275,16 +19250,25 @@ ix86_init_mmx_sse_builtins (void) /* Add the remaining MMX insns with somewhat more complicated types. */ def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); - - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSLLW); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSLLD); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ); + + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRLW); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRLD); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ); + + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRAW); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRAD); def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); @@ -20829,6 +20813,39 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, emit_insn (pat); return target; + case IX86_BUILTIN_PSLLW: + case IX86_BUILTIN_PSLLWI: + icode = CODE_FOR_mmx_ashlv4hi3; + goto do_pshift; + case IX86_BUILTIN_PSLLD: + case IX86_BUILTIN_PSLLDI: + icode = CODE_FOR_mmx_ashlv2si3; + goto do_pshift; + case IX86_BUILTIN_PSLLQ: + case IX86_BUILTIN_PSLLQI: + icode = CODE_FOR_mmx_ashlv1di3; + goto do_pshift; + case IX86_BUILTIN_PSRAW: + case IX86_BUILTIN_PSRAWI: + icode = CODE_FOR_mmx_ashrv4hi3; + goto do_pshift; + case IX86_BUILTIN_PSRAD: + case IX86_BUILTIN_PSRADI: + icode = CODE_FOR_mmx_ashrv2si3; + goto do_pshift; + case IX86_BUILTIN_PSRLW: + case IX86_BUILTIN_PSRLWI: + icode = CODE_FOR_mmx_lshrv4hi3; + goto do_pshift; + case IX86_BUILTIN_PSRLD: + case IX86_BUILTIN_PSRLDI: + icode = CODE_FOR_mmx_lshrv2si3; + goto do_pshift; + case IX86_BUILTIN_PSRLQ: + case IX86_BUILTIN_PSRLQI: + icode = CODE_FOR_mmx_lshrv1di3; + goto do_pshift; + case IX86_BUILTIN_PSLLW128: case IX86_BUILTIN_PSLLWI128: icode = CODE_FOR_ashlv8hi3; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index f2429846691..98cb72ac921 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1120,8 +1120,9 @@ do { \ ((MODE) == V2SFmode || (MODE) == SFmode) #define VALID_MMX_REG_MODE(MODE) \ - ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode \ - || (MODE) == V2SImode || (MODE) == SImode) + ((MODE == V1DImode) || (MODE) == DImode \ + || (MODE) == V2SImode || (MODE) == SImode \ + || (MODE) == V4HImode || (MODE) == V8QImode) /* ??? No autovectorization into MMX or 3DNOW until we can reliably place emms and femms instructions. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d9b625cb295..eb942d60d9c 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -95,7 +95,6 @@ (UNSPEC_RCP 45) (UNSPEC_RSQRT 46) (UNSPEC_SFENCE 47) - (UNSPEC_NOP 48) ; prevents combiner cleverness (UNSPEC_PFRCP 49) (UNSPEC_PFRCPIT1 40) (UNSPEC_PFRCPIT2 41) diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h index a04109be616..94800ad688e 100644 --- a/gcc/config/i386/mmintrin.h +++ b/gcc/config/i386/mmintrin.h @@ -42,6 +42,7 @@ typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__)); typedef int __v2si __attribute__ ((__vector_size__ (8))); typedef short __v4hi __attribute__ ((__vector_size__ (8))); typedef char __v8qi __attribute__ ((__vector_size__ (8))); +typedef long long __v1di __attribute__ ((__vector_size__ (8))); /* Empty the multimedia state. */ static __inline void __attribute__((__always_inline__, __artificial__)) @@ -309,7 +310,7 @@ _m_paddd (__m64 __m1, __m64 __m2) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_add_si64 (__m64 __m1, __m64 __m2) { - return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2); + return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2); } #endif @@ -413,7 +414,7 @@ _m_psubd (__m64 __m1, __m64 __m2) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_sub_si64 (__m64 __m1, __m64 __m2) { - return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2); + return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2); } #endif @@ -520,7 +521,7 @@ _m_pmullw (__m64 __m1, __m64 __m2) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_sll_pi16 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count); + return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -532,7 +533,7 @@ _m_psllw (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_slli_pi16 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count); + return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -545,7 +546,7 @@ _m_psllwi (__m64 __m, int __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_sll_pi32 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count); + return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -557,7 +558,7 @@ _m_pslld (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_slli_pi32 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count); + return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -570,7 +571,7 @@ _m_pslldi (__m64 __m, int __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_sll_si64 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); + return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -582,7 +583,7 @@ _m_psllq (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_slli_si64 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); + return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -595,7 +596,7 @@ _m_psllqi (__m64 __m, int __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_sra_pi16 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count); + return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -607,7 +608,7 @@ _m_psraw (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srai_pi16 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count); + return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -620,7 +621,7 @@ _m_psrawi (__m64 __m, int __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_sra_pi32 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count); + return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -632,7 +633,7 @@ _m_psrad (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srai_pi32 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count); + return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -645,7 +646,7 @@ _m_psradi (__m64 __m, int __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srl_pi16 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count); + return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -657,7 +658,7 @@ _m_psrlw (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srli_pi16 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count); + return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -670,7 +671,7 @@ _m_psrlwi (__m64 __m, int __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srl_pi32 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count); + return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -682,7 +683,7 @@ _m_psrld (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srli_pi32 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count); + return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -695,7 +696,7 @@ _m_psrldi (__m64 __m, int __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srl_si64 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); + return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -707,7 +708,7 @@ _m_psrlq (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srli_si64 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); + return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 3371161f82f..1d2a41dd114 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -32,16 +32,18 @@ ;; 8 byte integral modes handled by MMX (and by extension, SSE) (define_mode_iterator MMXMODEI [V8QI V4HI V2SI]) +(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI]) ;; All 8-byte vector modes handled by MMX -(define_mode_iterator MMXMODE [V8QI V4HI V2SI V2SF]) +(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF]) ;; Mix-n-match (define_mode_iterator MMXMODE12 [V8QI V4HI]) (define_mode_iterator MMXMODE24 [V4HI V2SI]) +(define_mode_iterator MMXMODE248 [V4HI V2SI V1DI]) ;; Mapping from integer vector mode to mnemonic suffix -(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (DI "q")]) +(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -53,8 +55,8 @@ ;; This is essential for maintaining stable calling conventions. (define_expand "mov<mode>" - [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "") - (match_operand:MMXMODEI 1 "nonimmediate_operand" ""))] + [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand" "") + (match_operand:MMXMODEI8 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (<MODE>mode, operands); @@ -62,9 +64,9 @@ }) (define_insn "*mov<mode>_internal_rex64" - [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" + [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand" "=rm,r,!y,!y ,m ,!y,Y2,x,x ,m,r,x") - (match_operand:MMXMODEI 1 "vector_move_operand" + (match_operand:MMXMODEI8 1 "vector_move_operand" "Cr ,m,C ,!ym,!y,Y2,!y,C,xm,x,x,r"))] "TARGET_64BIT && TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" @@ -86,9 +88,9 @@ (set_attr "mode" "DI")]) (define_insn "*mov<mode>_internal" - [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" + [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand" "=!y,!y ,m ,!y ,*Y2,*Y2,*Y2 ,m ,*x,*x,*x,m ,?r ,?m") - (match_operand:MMXMODEI 1 "vector_move_operand" + (match_operand:MMXMODEI8 1 "vector_move_operand" "C ,!ym,!y,*Y2,!y ,C ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" @@ -557,26 +559,16 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "mmx_add<mode>3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y") - (plus:MMXMODEI - (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0") - (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" + [(set (match_operand:MMXMODEI8 0 "register_operand" "=y") + (plus:MMXMODEI8 + (match_operand:MMXMODEI8 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))] + "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)) + && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" "padd<mmxvecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) -(define_insn "mmx_adddi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, DImode, operands)" - "paddq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - (define_insn "mmx_ssadd<mode>3" [(set (match_operand:MMXMODE12 0 "register_operand" "=y") (ss_plus:MMXMODE12 @@ -598,26 +590,15 @@ (set_attr "mode" "DI")]) (define_insn "mmx_sub<mode>3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y") - (minus:MMXMODEI - (match_operand:MMXMODEI 1 "register_operand" "0") - (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" + [(set (match_operand:MMXMODEI8 0 "register_operand" "=y") + (minus:MMXMODEI8 + (match_operand:MMXMODEI8 1 "register_operand" "0") + (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))] + "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))" "psub<mmxvecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) -(define_insn "mmx_subdi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(minus:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_SSE2" - "psubq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - (define_insn "mmx_sssub<mode>3" [(set (match_operand:MMXMODE12 0 "register_operand" "=y") (ss_minus:MMXMODE12 @@ -778,54 +759,32 @@ [(set (match_operand:MMXMODE24 0 "register_operand" "=y") (ashiftrt:MMXMODE24 (match_operand:MMXMODE24 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] + (match_operand:SI 2 "nonmemory_operand" "yN")))] "TARGET_MMX" "psra<mmxvecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) (define_insn "mmx_lshr<mode>3" - [(set (match_operand:MMXMODE24 0 "register_operand" "=y") - (lshiftrt:MMXMODE24 - (match_operand:MMXMODE24 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] + [(set (match_operand:MMXMODE248 0 "register_operand" "=y") + (lshiftrt:MMXMODE248 + (match_operand:MMXMODE248 1 "register_operand" "0") + (match_operand:SI 2 "nonmemory_operand" "yN")))] "TARGET_MMX" "psrl<mmxvecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "mmx_lshrdi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] - UNSPEC_NOP))] - "TARGET_MMX" - "psrlq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - (define_insn "mmx_ashl<mode>3" - [(set (match_operand:MMXMODE24 0 "register_operand" "=y") - (ashift:MMXMODE24 - (match_operand:MMXMODE24 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] + [(set (match_operand:MMXMODE248 0 "register_operand" "=y") + (ashift:MMXMODE248 + (match_operand:MMXMODE248 1 "register_operand" "0") + (match_operand:SI 2 "nonmemory_operand" "yN")))] "TARGET_MMX" "psll<mmxvecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "mmx_ashldi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(ashift:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] - UNSPEC_NOP))] - "TARGET_MMX" - "psllq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integral comparisons diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 3d543073f4c..9dbdc8ea947 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -7476,6 +7476,24 @@ v2si __builtin_ia32_punpckldq (v2si, v2si) v8qi __builtin_ia32_packsswb (v4hi, v4hi) v4hi __builtin_ia32_packssdw (v2si, v2si) v8qi __builtin_ia32_packuswb (v4hi, v4hi) + +v8hi __builtin_ia32_psllw (v4hi, v4hi) +v4si __builtin_ia32_pslld (v2si, v2si) +v2di __builtin_ia32_psllq (v1di, v1di) +v8hi __builtin_ia32_psrlw (v8hi, v2si) +v4si __builtin_ia32_psrld (v4si, v2si) +v2di __builtin_ia32_psrlq (v1di, v1di) +v8hi __builtin_ia32_psraw (v4hi, v4hi) +v4si __builtin_ia32_psrad (v2si, v2si) +v8hi __builtin_ia32_psllwi (v4hi, int) +v4si __builtin_ia32_pslldi (v2si, int) +v2di __builtin_ia32_psllqi (v1di, int) +v8hi __builtin_ia32_psrlwi (v4hi, int) +v4si __builtin_ia32_psrldi (v2si, int) +v2di __builtin_ia32_psrlqi (v1di, int) +v8hi __builtin_ia32_psrawi (v4hi, int) +v4si __builtin_ia32_psradi (v2si, int) + @end smallexample The following built-in functions are made available either with |