diff options
author | bernds <bernds@138bc75d-0d04-0410-961f-82ee72b054a4> | 2001-09-28 18:00:35 +0000 |
---|---|---|
committer | bernds <bernds@138bc75d-0d04-0410-961f-82ee72b054a4> | 2001-09-28 18:00:35 +0000 |
commit | d5e0afe2f0803922958372c05b69db0512caf7bb (patch) | |
tree | ed3a0fd403af3666e62d6260eb780d02efdfc593 /gcc | |
parent | 9532742b71aeff799d11854814948bd038465856 (diff) | |
download | gcc-d5e0afe2f0803922958372c05b69db0512caf7bb.tar.gz |
Add support for 3Dnow builtins
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@45863 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 31 | ||||
-rw-r--r-- | gcc/c-common.c | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 216 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 54 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 381 | ||||
-rw-r--r-- | gcc/tree.c | 5 | ||||
-rw-r--r-- | gcc/tree.h | 2 |
7 files changed, 648 insertions, 43 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index cfedd181d94..aaaaa30da1c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,34 @@ +2001-09-25 Bernd Schmidt <bernds@redhat.com> + + Mostly from Graham Stott <grahams@redhat.com> + * c-common.c (type_for_mode): Add support for V2SFmode. + * tree.c (build_common_tree_nodes_2): Likewise. + * tree.h (enum tree_index, global_trees): Likewise. + * config/i386/i386.c (x86_3dnow_a): New variable. + (override_options): Support 3Dnow extensions. + (bdesc_2arg, bdesc_1arg): Some SSE instructions are also part of + Athlon's version of 3Dnow. + (ix86_init_mmx_sse_builtins): Create 3Dnow builtins. + (ix86_expand_builtin): Handle them. + (ix86_hard_regno_mode_ok): Support V2SFmode if using 3Dnow. + * config/i386/i386.h (MASK_3DNOW, MASK_3DNOW_A, TARGET_3DNOW, + TARGET_3DNOW_A): New macros. + (TARGET_SWITCHES): Add 3Dnow switches. + (VALID_MMX_REG_MODE_3DNOW): New macro. + (VECTOR_MODE_SUPPORTED_P): Use it. + (enum ix86_builtins): Add entries for 3Dnow builtins. + * config/i386/i386.md (movv2sf_internal, movv2sf, pushv2sf, pf2id, + pf2iw, addv2sf3, subv2sf3, subrv2sf3, gtv2sf3, gev2sf3, eqv2sf3, + pfmaxv23sf3, pfminv2sf3, mulv2sf3, femms, prefetch_3dnow, prefetchw, + pfacc, pfnacc, pfpnacc, pi2fw, floatv2si2, pavgusb, pfrcpv2sf2, + pfrcpit1v2sf3, pfrcpit2v2sf3, pfrsqrtv2sf2, pfrsqit1v2sf3, + pmulhrwvhi3, pswapdv2si2, pswapdv2sf2): New patterns. + (mmx_pmovmskb, mmx_maskmovq, sse_movntdi, umulv4hi3_highpart, + mmx_uavgv8qi3, mmx_uavgv4hi3, mmx_psadbw, mmx_pinsrw, mmx_pextrw, + mmx_pshufw, umaxv8qi3, smaxv4hi3, uminv8qi3, sminv4hi3, sfence, + sfence_insn, prefetch): Make these available if TARGET_SSE or + TARGET_3DNOW_A. + Fri Sep 28 19:18:40 CEST 2001 Jan Hubicka <jh@suse.cz> * i386-protos.h (ix86_setup_incoming_varargs, ix86_va_arg, diff --git a/gcc/c-common.c b/gcc/c-common.c index 74a55525015..190c338d5f9 100644 --- a/gcc/c-common.c +++ b/gcc/c-common.c @@ -1347,6 +1347,8 @@ type_for_mode (mode, unsignedp) return V4HI_type_node; if (mode == TYPE_MODE (V8QI_type_node) && VECTOR_MODE_SUPPORTED_P (mode)) return V8QI_type_node; + if (mode == TYPE_MODE (V2SF_type_node) && VECTOR_MODE_SUPPORTED_P (mode)) + return V2SF_type_node; #endif return 0; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 889c428bc37..f8b65daf5fd 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -323,6 +323,7 @@ const int x86_double_with_add = ~m_386; const int x86_use_bit_test = m_386; const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6; const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4; +const int x86_3dnow_a = m_ATHLON; const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4; const int x86_branch_hints = m_PENT4; const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4; @@ -988,6 +989,15 @@ override_options () if (TARGET_SSE) target_flags |= MASK_MMX; + /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */ + if (TARGET_3DNOW) + { + target_flags |= MASK_MMX; + /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX + extensions it adds. */ + if (x86_3dnow_a & (1 << ix86_arch)) + target_flags |= MASK_3DNOW_A; + } if ((x86_accumulate_outgoing_args & CPUMASK) && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS) && !optimize_size) @@ -10731,15 +10741,15 @@ static struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, - { MASK_SSE, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, - { MASK_SSE, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, - { MASK_SSE, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, @@ -10748,10 +10758,10 @@ static struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, - { MASK_SSE, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, - { MASK_SSE, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, - { MASK_SSE, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, - { MASK_SSE, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, @@ -10794,7 +10804,7 @@ static struct builtin_description bdesc_2arg[] = static struct builtin_description bdesc_1arg[] = { - { MASK_SSE, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, @@ -11034,6 +11044,40 @@ ix86_init_mmx_sse_builtins () long_long_unsigned_type_node, endlink))); + tree v2si_ftype_v2sf + = build_function_type (V2SI_type_node, + tree_cons (NULL_TREE, V2SF_type_node, + endlink)); + tree v2sf_ftype_v2si + = build_function_type (V2SF_type_node, + tree_cons (NULL_TREE, V2SI_type_node, + endlink)); + tree v2si_ftype_v2si + = build_function_type (V2SI_type_node, + tree_cons (NULL_TREE, V2SI_type_node, + endlink)); + tree v2sf_ftype_v2sf + = build_function_type (V2SF_type_node, + tree_cons (NULL_TREE, V2SF_type_node, + endlink)); + tree v2sf_ftype_v2sf_v2sf + = build_function_type (V2SF_type_node, + tree_cons (NULL_TREE, V2SF_type_node, + tree_cons (NULL_TREE, + V2SF_type_node, + endlink))); + tree v2si_ftype_v2sf_v2sf + = build_function_type (V2SI_type_node, + tree_cons (NULL_TREE, V2SF_type_node, + tree_cons (NULL_TREE, + V2SF_type_node, + endlink))); + + tree void_ftype_pchar + = build_function_type (void_type_node, + tree_cons (NULL_TREE, pchar_type_node, + endlink)); + /* Add all builtins that are more or less simple operations on two operands. */ for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++) @@ -11047,9 +11091,6 @@ ix86_init_mmx_sse_builtins () continue; mode = insn_data[d->icode].operand[1].mode; - if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode)) - continue; - switch (mode) { case V4SFmode: @@ -11121,10 +11162,10 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); - def_builtin (MASK_SSE, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); - def_builtin (MASK_SSE, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); - def_builtin (MASK_SSE, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS); def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS); @@ -11139,14 +11180,14 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE, "__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); - def_builtin (MASK_SSE, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); - def_builtin (MASK_SSE, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); - def_builtin (MASK_SSE, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); - def_builtin (MASK_SSE, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH); - def_builtin (MASK_SSE, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); @@ -11157,6 +11198,38 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); + /* Original 3DNow! */ + def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); + def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); + def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); + def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); + def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); + def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW); + def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW); + + /* 3DNow! extension as used in the Athlon CPU. */ + def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); + def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); + def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); + def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); + def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); + def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); + /* Composite intrinsics. */ def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1); def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS); @@ -11179,7 +11252,7 @@ safe_vector_operand (x, mode) return x; x = gen_reg_rtx (mode); - if (VALID_MMX_REG_MODE (mode)) + if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode)) emit_insn (gen_mmx_clrdi (mode == DImode ? x : gen_rtx_SUBREG (DImode, x, 0))); else @@ -11739,6 +11812,107 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) emit_insn (pat); return target; + case IX86_BUILTIN_FEMMS: + emit_insn (gen_femms ()); + return NULL_RTX; + + case IX86_BUILTIN_PAVGUSB: + return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target); + + case IX86_BUILTIN_PF2ID: + return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0); + + case IX86_BUILTIN_PFACC: + return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target); + + case IX86_BUILTIN_PFADD: + return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target); + + case IX86_BUILTIN_PFCMPEQ: + return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target); + + case IX86_BUILTIN_PFCMPGE: + return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target); + + case IX86_BUILTIN_PFCMPGT: + return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target); + + case IX86_BUILTIN_PFMAX: + return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target); + + case IX86_BUILTIN_PFMIN: + return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target); + + case IX86_BUILTIN_PFMUL: + return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target); + + case IX86_BUILTIN_PFRCP: + return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0); + + case IX86_BUILTIN_PFRCPIT1: + return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target); + + case IX86_BUILTIN_PFRCPIT2: + return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target); + + case IX86_BUILTIN_PFRSQIT1: + return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target); + + case IX86_BUILTIN_PFRSQRT: + return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0); + + case IX86_BUILTIN_PFSUB: + return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target); + + case IX86_BUILTIN_PFSUBR: + return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target); + + case IX86_BUILTIN_PI2FD: + return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0); + + case IX86_BUILTIN_PMULHRW: + return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target); + + case IX86_BUILTIN_PREFETCH_3DNOW: + icode = CODE_FOR_prefetch_3dnow; + arg0 = TREE_VALUE (arglist); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + mode0 = insn_data[icode].operand[0].mode; + pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0)); + if (! pat) + return NULL_RTX; + emit_insn (pat); + return NULL_RTX; + + case IX86_BUILTIN_PREFETCHW: + icode = CODE_FOR_prefetchw; + arg0 = TREE_VALUE (arglist); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + mode0 = insn_data[icode].operand[0].mode; + pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0)); + if (! pat) + return NULL_RTX; + emit_insn (pat); + return NULL_RTX; + + case IX86_BUILTIN_PF2IW: + return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0); + + case IX86_BUILTIN_PFNACC: + return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target); + + case IX86_BUILTIN_PFPNACC: + return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target); + + case IX86_BUILTIN_PI2FW: + return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0); + + case IX86_BUILTIN_PSWAPDSI: + return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0); + + case IX86_BUILTIN_PSWAPDSF: + return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0); + /* Composite intrinsics. */ case IX86_BUILTIN_SETPS1: target = assign_386_stack_local (SFmode, 0); @@ -12055,7 +12229,7 @@ ix86_hard_regno_mode_ok (regno, mode) if (SSE_REGNO_P (regno)) return VALID_SSE_REG_MODE (mode); if (MMX_REGNO_P (regno)) - return VALID_MMX_REG_MODE (mode); + return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode); /* We handle both integer and floats in the general purpose registers. In future we should be able to handle vector modes as well. */ if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode)) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 654d8fbdac0..fc6f1c93861 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -118,10 +118,12 @@ extern int target_flags; #define MASK_MMX 0x00020000 /* Support MMX regs/builtins */ #define MASK_SSE 0x00040000 /* Support SSE regs/builtins */ #define MASK_SSE2 0x00080000 /* Support SSE2 regs/builtins */ -#define MASK_128BIT_LONG_DOUBLE 0x00100000 /* long double size is 128bit */ -#define MASK_MIX_SSE_I387 0x00200000 /* Mix SSE and i387 instructions */ -#define MASK_64BIT 0x00400000 /* Produce 64bit code */ -#define MASK_NO_RED_ZONE 0x00800000 /* Do not use red zone */ +#define MASK_3DNOW 0x00100000 /* Support 3Dnow builtins */ +#define MASK_3DNOW_A 0x00200000 /* Support Athlon 3Dnow builtins */ +#define MASK_128BIT_LONG_DOUBLE 0x00400000 /* long double size is 128bit */ +#define MASK_MIX_SSE_I387 0x00800000 /* Mix SSE and i387 instructions */ +#define MASK_64BIT 0x01000000 /* Produce 64bit code */ +#define MASK_NO_RED_ZONE 0x02000000 /* Do not use red zone */ /* Temporary codegen switches */ #define MASK_INTEL_SYNTAX 0x00000200 @@ -264,6 +266,8 @@ extern const int x86_epilogue_using_move; #define TARGET_SSE2 ((target_flags & MASK_SSE2) != 0) #define TARGET_MIX_SSE_I387 ((target_flags & MASK_MIX_SSE_I387) != 0) #define TARGET_MMX ((target_flags & MASK_MMX) != 0) +#define TARGET_3DNOW ((target_flags & MASK_3DNOW) != 0) +#define TARGET_3DNOW_A ((target_flags & MASK_3DNOW_A) != 0) #define TARGET_RED_ZONE (!(target_flags & MASK_NO_RED_ZONE)) @@ -335,6 +339,10 @@ extern const int x86_epilogue_using_move; { "mmx", MASK_MMX, N_("Support MMX builtins") }, \ { "no-mmx", -MASK_MMX, \ N_("Do not support MMX builtins") }, \ + { "3dnow", MASK_3DNOW, \ + N_("Support 3DNow! builtins") }, \ + { "no-3dnow", -MASK_3DNOW, \ + N_("Do not support 3DNow! builtins") }, \ { "sse", MASK_SSE, \ N_("Support MMX and SSE builtins and code generation") }, \ { "no-sse", -MASK_SSE, \ @@ -918,13 +926,17 @@ extern int ix86_arch; || (MODE) == SFmode \ || (TARGET_SSE2 && ((MODE) == DFmode || VALID_MMX_REG_MODE (MODE)))) +#define VALID_MMX_REG_MODE_3DNOW(MODE) \ + ((MODE) == V2SFmode || (MODE) == SFmode) + #define VALID_MMX_REG_MODE(MODE) \ ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode \ || (MODE) == V2SImode || (MODE) == SImode) #define VECTOR_MODE_SUPPORTED_P(MODE) \ (VALID_SSE_REG_MODE (MODE) && TARGET_SSE ? 1 \ - : VALID_MMX_REG_MODE (MODE) && TARGET_MMX ? 1 : 0) + : VALID_MMX_REG_MODE (MODE) && TARGET_MMX ? 1 \ + : VALID_MMX_REG_MODE_3DNOW (MODE) && TARGET_3DNOW ? 1 : 0) #define VALID_FP_MODE_P(mode) \ ((mode) == SFmode || (mode) == DFmode || (mode) == TFmode \ @@ -2204,6 +2216,38 @@ enum ix86_builtins IX86_BUILTIN_SFENCE, IX86_BUILTIN_PREFETCH, + /* 3DNow! Original */ + IX86_BUILTIN_FEMMS, + IX86_BUILTIN_PAVGUSB, + IX86_BUILTIN_PF2ID, + IX86_BUILTIN_PFACC, + IX86_BUILTIN_PFADD, + IX86_BUILTIN_PFCMPEQ, + IX86_BUILTIN_PFCMPGE, + IX86_BUILTIN_PFCMPGT, + IX86_BUILTIN_PFMAX, + IX86_BUILTIN_PFMIN, + IX86_BUILTIN_PFMUL, + IX86_BUILTIN_PFRCP, + IX86_BUILTIN_PFRCPIT1, + IX86_BUILTIN_PFRCPIT2, + IX86_BUILTIN_PFRSQIT1, + IX86_BUILTIN_PFRSQRT, + IX86_BUILTIN_PFSUB, + IX86_BUILTIN_PFSUBR, + IX86_BUILTIN_PI2FD, + IX86_BUILTIN_PMULHRW, + IX86_BUILTIN_PREFETCH_3DNOW, /* PREFETCH already used */ + IX86_BUILTIN_PREFETCHW, + + /* 3DNow! Athlon Extensions */ + IX86_BUILTIN_PF2IW, + IX86_BUILTIN_PFNACC, + IX86_BUILTIN_PFPNACC, + IX86_BUILTIN_PI2FW, + IX86_BUILTIN_PSWAPDSI, + IX86_BUILTIN_PSWAPDSF, + /* Composite builtins, expand to more than one insn. */ IX86_BUILTIN_SETPS1, IX86_BUILTIN_SETPS, diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 5119db6615b..3b98788b67a 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -92,6 +92,15 @@ ;; 43 This is a `rsqsrt' operation. ;; 44 This is a `sfence' operation. ;; 45 This is a noop to prevent excessive combiner cleverness. +;; 46 This is a `femms' operation. +;; 47 This is a `prefetch' (3DNow) operation. +;; 48 This is a `prefetchw' operation. +;; 49 This is a 'pavgusb' operation. +;; 50 This is a `pfrcp' operation. +;; 51 This is a `pfrcpit1' operation. +;; 52 This is a `pfrcpit2' operation. +;; 53 This is a `pfrsqrt' operation. +;; 54 This is a `pfrsqrit1' operation. ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls ;; from i386.c. @@ -17455,6 +17464,13 @@ "movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmx")]) +(define_insn "movv2sf_internal" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m") + (match_operand:V2SF 1 "general_operand" "ym,y"))] + "TARGET_3DNOW" + "movq\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + (define_expand "movti" [(set (match_operand:TI 0 "general_operand" "") (match_operand:TI 1 "general_operand" ""))] @@ -17641,6 +17657,40 @@ } }) +(define_expand "movv2sf" + [(set (match_operand:V2SF 0 "general_operand" "") + (match_operand:V2SF 1 "general_operand" ""))] + "TARGET_3DNOW" + " +{ + /* For constants other than zero into memory. We do not know how the + instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (operands[0], V2SFmode) + && CONSTANT_P (operands[1])) + { + rtx addr = gen_reg_rtx (Pmode); + + emit_move_insn (addr, + XEXP (force_const_mem (V2SFmode, operands[1]), 0)); + operands[1] = gen_rtx_MEM (V2SFmode, addr); + } + + /* Make operand1 a register is it isn't already. */ + if ((reload_in_progress | reload_completed) == 0 + && !register_operand (operands[0], V2SFmode) + && !register_operand (operands[1], V2SFmode) + && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0) + && operands[1] != CONST0_RTX (V2SFmode)) + { + rtx temp = force_reg (V2SFmode, operands[1]); + emit_move_insn (operands[0], temp); + DONE; + } +}") + (define_insn_and_split "*pushti" [(set (match_operand:TI 0 "push_operand" "=<") (match_operand:TI 1 "nonmemory_operand" "x"))] @@ -17707,6 +17757,17 @@ "" [(set_attr "type" "mmx")]) +(define_insn_and_split "*pushv2sf" + [(set (match_operand:V2SF 0 "push_operand" "=<") + (match_operand:V2SF 1 "nonmemory_operand" "y"))] + "TARGET_3DNOW" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (set (mem:V2SF (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "mmx")]) + (define_insn "movti_internal" [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m") (match_operand:TI 1 "general_operand" "xm,x"))] @@ -17749,7 +17810,7 @@ (define_insn "mmx_pmovmskb" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] 33))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" "pmovmskb\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) @@ -17757,7 +17818,7 @@ [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D")) (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") (match_operand:V8QI 2 "register_operand" "y")] 32))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" ;; @@@ check ordering of operands in intel/nonintel syntax "maskmovq\t{%2, %1|%1, %2}" [(set_attr "type" "sse")]) @@ -17772,7 +17833,7 @@ (define_insn "sse_movntdi" [(set (match_operand:DI 0 "memory_operand" "=m") (unspec:DI [(match_operand:DI 1 "register_operand" "y")] 34))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" "movntq\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) @@ -18535,7 +18596,7 @@ (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0")) (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) (const_int 16))))] - "TARGET_MMX" + "TARGET_SSE || TARGET_3DNOW_A" "pmulhuw\t{%2, %0|%0, %2}" [(set_attr "type" "mmx")]) @@ -18628,7 +18689,7 @@ (const_int 1) (const_int 1)]))) (const_int 1)))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" "pavgb\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18643,7 +18704,7 @@ (const_int 1) (const_int 1)]))) (const_int 1)))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" "pavgw\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18651,7 +18712,7 @@ [(set (match_operand:V8QI 0 "register_operand" "=y") (abs:V8QI (minus:V8QI (match_operand:V8QI 1 "register_operand" "0") (match_operand:V8QI 2 "nonimmediate_operand" "ym"))))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" "psadbw\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18664,7 +18725,7 @@ (vec_duplicate:V4HI (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "rm"))) (match_operand:SI 3 "immediate_operand" "i")))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" "pinsrw\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "sse")]) @@ -18673,7 +18734,7 @@ (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" "pextrw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sse")]) @@ -18682,7 +18743,7 @@ (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "0") (match_operand:V4HI 2 "nonimmediate_operand" "ym") (match_operand:SI 3 "immediate_operand" "i")] 41))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" "pshufw\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "sse")]) @@ -18744,7 +18805,7 @@ [(set (match_operand:V8QI 0 "register_operand" "=y") (umax:V8QI (match_operand:V8QI 1 "register_operand" "0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" "pmaxub\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18752,7 +18813,7 @@ [(set (match_operand:V4HI 0 "register_operand" "=y") (smax:V4HI (match_operand:V4HI 1 "register_operand" "0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" "pmaxsw\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18760,7 +18821,7 @@ [(set (match_operand:V8QI 0 "register_operand" "=y") (umin:V8QI (match_operand:V8QI 1 "register_operand" "0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" "pminub\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18768,7 +18829,7 @@ [(set (match_operand:V4HI 0 "register_operand" "=y") (smin:V4HI (match_operand:V4HI 1 "register_operand" "0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" "pminsw\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -19031,7 +19092,7 @@ (define_expand "sfence" [(set (match_dup 0) (unspec:BLK [(match_dup 0)] 44))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" { operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); MEM_VOLATILE_P (operands[0]) = 1; @@ -19040,7 +19101,7 @@ (define_insn "*sfence_insn" [(set (match_operand:BLK 0 "" "") (unspec:BLK [(match_dup 0)] 44))] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" "sfence" [(set_attr "type" "sse") (set_attr "memory" "unknown")]) @@ -19048,7 +19109,7 @@ (define_insn "prefetch" [(unspec [(match_operand:SI 0 "address_operand" "p") (match_operand:SI 1 "immediate_operand" "n")] 35)] - "TARGET_SSE" + "TARGET_SSE || TARGET_3DNOW_A" { switch (INTVAL (operands[1])) { @@ -19126,3 +19187,289 @@ (set_attr "memory" "store") (set_attr "modrm" "0") (set_attr "mode" "DI")]) + +;; 3Dnow! instructions + +(define_insn "addv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (plus:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfadd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "subv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (minus:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfsub\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "subrv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "ym") + (match_operand:V2SF 1 "register_operand" "0")))] + "TARGET_3DNOW" + "pfsubr\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "gtv2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (gt:V2SI (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfcmpgt\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "gev2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ge:V2SI (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfcmpge\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "eqv2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (eq:V2SI (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfcmpeq\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pfmaxv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (smax:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfmax\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pfminv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (smin:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfmin\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mulv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (mult:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfmul\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "femms" + [(unspec_volatile [(const_int 0)] 46) + (clobber (reg:XF 8)) + (clobber (reg:XF 9)) + (clobber (reg:XF 10)) + (clobber (reg:XF 11)) + (clobber (reg:XF 12)) + (clobber (reg:XF 13)) + (clobber (reg:XF 14)) + (clobber (reg:XF 15)) + (clobber (reg:DI 29)) + (clobber (reg:DI 30)) + (clobber (reg:DI 31)) + (clobber (reg:DI 32)) + (clobber (reg:DI 33)) + (clobber (reg:DI 34)) + (clobber (reg:DI 35)) + (clobber (reg:DI 36))] + "TARGET_3DNOW" + "femms" + [(set_attr "type" "mmx")]) + +(define_insn "prefetch_3dnow" + [(unspec [(match_operand:SI 0 "address_operand" "p")] 47)] + "TARGET_3DNOW" + "prefetch\\t%a0" + [(set_attr "type" "mmx")]) + +(define_insn "prefetchw" + [(unspec [(match_operand:SI 0 "address_operand" "p")] 48)] + "TARGET_3DNOW" + "prefetchw\\t%a0" + [(set_attr "type" "mmx")]) + +(define_insn "pf2id" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pf2id\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "pf2iw" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (sign_extend:V2SI + (ss_truncate:V2HI + (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))] + "TARGET_3DNOW_A" + "pf2iw\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "pfacc" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_concat:V2SF + (plus:SF + (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) + (parallel [(const_int 1)]))) + (plus:SF + (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) + (parallel [(const_int 1)])))))] + "TARGET_3DNOW" + "pfacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pfnacc" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_concat:V2SF + (minus:SF + (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) + (parallel [(const_int 1)]))) + (minus:SF + (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) + (parallel [(const_int 1)])))))] + "TARGET_3DNOW_A" + "pfnacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pfpnacc" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_concat:V2SF + (minus:SF + (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) + (parallel [(const_int 1)]))) + (plus:SF + (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) + (parallel [(const_int 1)])))))] + "TARGET_3DNOW_A" + "pfpnacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pi2fw" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (float:V2SF + (vec_concat:V2SI + (sign_extend:SI + (truncate:HI + (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])))) + (sign_extend:SI + (truncate:HI + (vec_select:SI (match_dup 1) + (parallel [(const_int 1)])))))))] + "TARGET_3DNOW_A" + "pi2fw\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "floatv2si2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pi2fd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +;; This insn is identical to pavgb in operation, but the opcode is +;; different. To avoid accidentally matching pavgb, use an unspec. + +(define_insn "pavgusb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (unspec:V8QI + [(match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")] 49))] + "TARGET_3DNOW" + "pavgusb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +;; 3DNow reciprical and sqrt + +(define_insn "pfrcpv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 50))] + "TARGET_3DNOW" + "pfrcp\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "pfrcpit1v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 51))] + "TARGET_3DNOW" + "pfrcpit1\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pfrcpit2v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 52))] + "TARGET_3DNOW" + "pfrcpit2\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pfrsqrtv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 53))] + "TARGET_3DNOW" + "pfrsqrt\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "pfrsqit1v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 54))] + "TARGET_3DNOW" + "pfrsqit1\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pmulhrwv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (match_operand:V4HI 1 "register_operand" "0")) + (sign_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (vec_const:V4SI + (parallel [(const_int 0x8000) + (const_int 0x8000) + (const_int 0x8000) + (const_int 0x8000)]))) + (const_int 16))))] + "TARGET_3DNOW" + "pmulhrw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pswapdv2si2" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI (match_operand:V2SI 1 "nonimmediate_operand" "ym") + (parallel [(const_int 1) (const_int 0)])))] + "TARGET_3DNOW_A" + "pswapd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "pswapdv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym") + (parallel [(const_int 1) (const_int 0)])))] + "TARGET_3DNOW_A" + "pswapd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) diff --git a/gcc/tree.c b/gcc/tree.c index 8a8d22d9760..62b07600497 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -4877,4 +4877,9 @@ build_common_tree_nodes_2 (short_double) TREE_TYPE (V8QI_type_node) = intQI_type_node; TYPE_MODE (V8QI_type_node) = V8QImode; finish_vector_type (V8QI_type_node); + + V2SF_type_node = make_node (VECTOR_TYPE); + TREE_TYPE (V2SF_type_node) = float_type_node; + TYPE_MODE (V2SF_type_node) = V2SFmode; + finish_vector_type (V2SF_type_node); } diff --git a/gcc/tree.h b/gcc/tree.h index b95a4aaca40..d4306a88eda 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1846,6 +1846,7 @@ enum tree_index TI_V8QI_TYPE, TI_V4HI_TYPE, TI_V2SI_TYPE, + TI_V2SF_TYPE, TI_MAIN_IDENTIFIER, @@ -1911,6 +1912,7 @@ extern tree global_trees[TI_MAX]; #define V8QI_type_node global_trees[TI_V8QI_TYPE] #define V4HI_type_node global_trees[TI_V4HI_TYPE] #define V2SI_type_node global_trees[TI_V2SI_TYPE] +#define V2SF_type_node global_trees[TI_V2SF_TYPE] /* An enumeration of the standard C integer types. These must be ordered so that shorter types appear before longer ones. */ |