diff options
author | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-02-26 13:09:58 +0000 |
---|---|---|
committer | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-02-26 13:09:58 +0000 |
commit | b8053af55de78a3f080783e5113fd6452e5a43c5 (patch) | |
tree | a5906142e844e296abb7382e34657faf4e58f74f /gcc/config | |
parent | 4896274c9597b09d4c61bdd2efb3201a72634b3c (diff) | |
download | gcc-b8053af55de78a3f080783e5113fd6452e5a43c5.tar.gz |
2008-02-26 Basile Starynkevitch <basile@starynkevitch.net>
MELT branch merged with trunk r132671
Merged revisions 132452-132671 via svnmerge from
svn+ssh://bstarynk@gcc.gnu.org/svn/gcc/trunk
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@132672 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/avr/libgcc.S | 50 | ||||
-rw-r--r-- | gcc/config/cris/cris.h | 1 | ||||
-rw-r--r-- | gcc/config/darwin-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/darwin.c | 48 | ||||
-rw-r--r-- | gcc/config/h8300/h8300.c | 2 | ||||
-rw-r--r-- | gcc/config/h8300/h8300.md | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 176 | ||||
-rw-r--r-- | gcc/config/i386/mmx.md | 16 | ||||
-rw-r--r-- | gcc/config/i386/netware-libgcc.exp | 9 | ||||
-rw-r--r-- | gcc/config/i386/netware.c | 191 | ||||
-rw-r--r-- | gcc/config/i386/netware.h | 24 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 2722 | ||||
-rw-r--r-- | gcc/config/mips/mips.md | 12 | ||||
-rw-r--r-- | gcc/config/rs6000/darwin.h | 3 | ||||
-rw-r--r-- | gcc/config/rs6000/eabispe.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/linuxspe.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 105 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.h | 27 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.md | 3 | ||||
-rw-r--r-- | gcc/config/sh/sh.c | 13 | ||||
-rw-r--r-- | gcc/config/sh/sh.md | 2 | ||||
-rw-r--r-- | gcc/config/spu/spu.md | 176 |
24 files changed, 1736 insertions, 1857 deletions
diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S index 397778b82d9..8fdba55f775 100644 --- a/gcc/config/avr/libgcc.S +++ b/gcc/config/avr/libgcc.S @@ -32,6 +32,7 @@ Boston, MA 02110-1301, USA. */ #define __SREG__ 0x3f #define __SP_H__ 0x3e #define __SP_L__ 0x3d +#define __RAMPZ__ 0x3B /* Most of the functions here are called directly from avr.md patterns, instead of using the standard libcall mechanisms. @@ -686,20 +687,54 @@ __tablejump__: .endfunc #endif /* defined (L_tablejump) */ -/* __do_copy_data is only necessary if there is anything in .data section. - Does not use RAMPZ - crt*.o provides a replacement for >64K devices. */ - #ifdef L_copy_data .section .init4,"ax",@progbits .global __do_copy_data __do_copy_data: +#if defined(__AVR_HAVE_ELPMX__) + ldi r17, hi8(__data_end) + ldi r26, lo8(__data_start) + ldi r27, hi8(__data_start) + ldi r30, lo8(__data_load_start) + ldi r31, hi8(__data_load_start) + ldi r16, hh8(__data_load_start) + out __RAMPZ__, r16 + rjmp .L__do_copy_data_start +.L__do_copy_data_loop: + elpm r0, Z+ + st X+, r0 +.L__do_copy_data_start: + cpi r26, lo8(__data_end) + cpc r27, r17 + brne .L__do_copy_data_loop +#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__) + ldi r17, hi8(__data_end) + ldi r26, lo8(__data_start) + ldi r27, hi8(__data_start) + ldi r30, lo8(__data_load_start) + ldi r31, hi8(__data_load_start) + ldi r16, hh8(__data_load_start - 0x10000) +.L__do_copy_data_carry: + inc r16 + out __RAMPZ__, r16 + rjmp .L__do_copy_data_start +.L__do_copy_data_loop: + elpm + st X+, r0 + adiw r30, 1 + brcs .L__do_copy_data_carry +.L__do_copy_data_start: + cpi r26, lo8(__data_end) + cpc r27, r17 + brne .L__do_copy_data_loop +#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) ldi r17, hi8(__data_end) ldi r26, lo8(__data_start) ldi r27, hi8(__data_start) ldi r30, lo8(__data_load_start) ldi r31, hi8(__data_load_start) - rjmp .do_copy_data_start -.do_copy_data_loop: + rjmp .L__do_copy_data_start +.L__do_copy_data_loop: #if defined (__AVR_HAVE_LPMX__) lpm r0, Z+ #else @@ -707,10 +742,11 @@ __do_copy_data: adiw r30, 1 #endif st X+, r0 -.do_copy_data_start: +.L__do_copy_data_start: cpi r26, lo8(__data_end) cpc r27, r17 - brne .do_copy_data_loop + brne .L__do_copy_data_loop +#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */ #endif /* L_copy_data */ /* __do_clear_bss is only necessary if there is anything in .bss section. */ diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h index 9e59b88216b..b8f4ae56713 100644 --- a/gcc/config/cris/cris.h +++ b/gcc/config/cris/cris.h @@ -600,6 +600,7 @@ enum reg_class #define REG_CLASS_FROM_LETTER(C) \ ( \ (C) == 'a' ? ACR_REGS : \ + (C) == 'b' ? GENNONACR_REGS : \ (C) == 'h' ? MOF_REGS : \ (C) == 'x' ? SPECIAL_REGS : \ (C) == 'c' ? CC0_REGS : \ diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h index a8ce17c0111..c894bf05104 100644 --- a/gcc/config/darwin-protos.h +++ b/gcc/config/darwin-protos.h @@ -89,3 +89,4 @@ extern void darwin_cpp_builtins (struct cpp_reader *); extern void darwin_asm_output_anchor (rtx symbol); extern bool darwin_kextabi_p (void); extern void darwin_override_options (void); +extern void darwin_patch_builtins (void); diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c index 307698d095f..13aa021f4c6 100644 --- a/gcc/config/darwin.c +++ b/gcc/config/darwin.c @@ -1735,4 +1735,52 @@ darwin_override_options (void) flag_var_tracking_uninit = 1; } +/* Add $LDBL128 suffix to long double builtins. */ + +static void +darwin_patch_builtin (int fncode) +{ + tree fn = built_in_decls[fncode]; + tree sym; + char *newname; + + if (!fn) + return; + + sym = DECL_ASSEMBLER_NAME (fn); + newname = alloca (IDENTIFIER_LENGTH (sym) + 10); + strcpy (newname, "_"); + strcat (newname, IDENTIFIER_POINTER (sym)); + strcat (newname, "$LDBL128"); + set_user_assembler_name (fn, newname); + /*sym = get_identifier (newname); + SET_DECL_ASSEMBLER_NAME (fn, sym);*/ + + fn = implicit_built_in_decls[fncode]; + if (fn) + set_user_assembler_name (fn, newname); + /*SET_DECL_ASSEMBLER_NAME (fn, sym);*/ +} + +void +darwin_patch_builtins (void) +{ + if (LONG_DOUBLE_TYPE_SIZE != 128) + return; + +#define PATCH_BUILTIN(fncode) darwin_patch_builtin (fncode); +#define PATCH_BUILTIN_NO64(fncode) \ + if (!TARGET_64BIT) \ + darwin_patch_builtin (fncode); +#define PATCH_BUILTIN_VARIADIC(fncode) \ + if (!TARGET_64BIT \ + && (strverscmp (darwin_macosx_version_min, "10.3.9") >= 0)) \ + darwin_patch_builtin (fncode); +#include "darwin-ppc-ldouble-patch.def" +#undef PATCH_BUILTIN +#undef PATCH_BUILTIN_NO64 +#undef PATCH_BUILTIN_VARIADIC +} + + #include "gt-darwin.h" diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c index 96b6311ce7c..f90bd414735 100644 --- a/gcc/config/h8300/h8300.c +++ b/gcc/config/h8300/h8300.c @@ -930,7 +930,7 @@ h8300_expand_epilogue (void) } if (!returned_p) - emit_insn (gen_rtx_RETURN (VOIDmode)); + emit_jump_insn (gen_rtx_RETURN (VOIDmode)); } /* Return nonzero if the current function is an interrupt diff --git a/gcc/config/h8300/h8300.md b/gcc/config/h8300/h8300.md index 08a8d2e9313..9b6c0aa4e16 100644 --- a/gcc/config/h8300/h8300.md +++ b/gcc/config/h8300/h8300.md @@ -3282,6 +3282,9 @@ if (GET_CODE (operands[0]) == MEM || GET_CODE (operands[3]) == MEM) FAIL; + + if (GET_CODE (operands[3]) != REG) + operands[3] = force_reg (HImode, operands[3]); }") (define_insn "" diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 5dad2fcf515..5a4456d912b 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12148,6 +12148,7 @@ ix86_expand_branch (enum rtx_code code, rtx label) ix86_expand_branch (code, label); return; } + break; case LE: case LEU: case GT: case GTU: if (lo[1] == constm1_rtx) { @@ -12156,6 +12157,7 @@ ix86_expand_branch (enum rtx_code code, rtx label) ix86_expand_branch (code, label); return; } + break; default: break; } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index fedac5643a4..f2429846691 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1454,6 +1454,9 @@ enum reg_class #define SSE_FLOAT_MODE_P(MODE) \ ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode)) +#define SSE_VEC_FLOAT_MODE_P(MODE) \ + ((TARGET_SSE && (MODE) == V4SFmode) || (TARGET_SSE2 && (MODE) == V2DFmode)) + #define MMX_REG_P(XOP) (REG_P (XOP) && MMX_REGNO_P (REGNO (XOP))) #define MMX_REGNO_P(N) IN_RANGE ((N), FIRST_MMX_REG, LAST_MMX_REG) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 3c9c2cc8c86..92a37280f5f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -13964,25 +13964,15 @@ ;; 0xffffffff is NaN, but not in normalized form, so we can't represent ;; it directly. -(define_insn "*sse_setccsf" - [(set (match_operand:SF 0 "register_operand" "=x") - (match_operator:SF 1 "sse_comparison_operator" - [(match_operand:SF 2 "register_operand" "0") - (match_operand:SF 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE && !TARGET_SSE5" - "cmp%D1ss\t{%3, %0|%0, %3}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "SF")]) - -(define_insn "*sse_setccdf" - [(set (match_operand:DF 0 "register_operand" "=x") - (match_operator:DF 1 "sse_comparison_operator" - [(match_operand:DF 2 "register_operand" "0") - (match_operand:DF 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE2 && !TARGET_SSE5" - "cmp%D1sd\t{%3, %0|%0, %3}" +(define_insn "*sse_setcc<mode>" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 1 "sse_comparison_operator" + [(match_operand:MODEF 2 "register_operand" "0") + (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] + "SSE_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5" + "cmp%D1s<ssemodefsuffix>\t{%3, %0|%0, %3}" [(set_attr "type" "ssecmp") - (set_attr "mode" "DF")]) + (set_attr "mode" "<MODE>")]) (define_insn "*sse5_setcc<mode>" [(set (match_operand:MODEF 0 "register_operand" "=x") @@ -19383,7 +19373,7 @@ (match_operand:DI 2 "general_operand" "") (match_operand:DI 3 "general_operand" "")))] "TARGET_64BIT" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") (define_insn "x86_movdicc_0_m1_rex64" [(set (match_operand:DI 0 "register_operand" "=r") @@ -19437,7 +19427,7 @@ (match_operand:SI 2 "general_operand" "") (match_operand:SI 3 "general_operand" "")))] "" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") ;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing ;; the register first winds up with `sbbl $0,reg', which is also weird. @@ -19495,7 +19485,7 @@ (match_operand:HI 2 "general_operand" "") (match_operand:HI 3 "general_operand" "")))] "TARGET_HIMODE_MATH" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") (define_insn "*movhicc_noc" [(set (match_operand:HI 0 "register_operand" "=r,r") @@ -19517,7 +19507,7 @@ (match_operand:QI 2 "general_operand" "") (match_operand:QI 3 "general_operand" "")))] "TARGET_QIMODE_MATH" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") (define_insn_and_split "*movqicc_noc" [(set (match_operand:QI 0 "register_operand" "=r,r") @@ -19539,13 +19529,15 @@ [(set_attr "type" "icmov") (set_attr "mode" "SI")]) -(define_expand "movsfcc" - [(set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (match_operand 1 "comparison_operator" "") - (match_operand:SF 2 "register_operand" "") - (match_operand:SF 3 "register_operand" "")))] - "(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH" - "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") +(define_expand "mov<mode>cc" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (if_then_else:X87MODEF + (match_operand 1 "comparison_operator" "") + (match_operand:X87MODEF 2 "register_operand" "") + (match_operand:X87MODEF 3 "register_operand" "")))] + "(TARGET_80387 && TARGET_CMOVE) + || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" + "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") (define_insn "*movsfcc_1_387" [(set (match_operand:SF 0 "register_operand" "=f,f,r,r") @@ -19563,14 +19555,6 @@ [(set_attr "type" "fcmov,fcmov,icmov,icmov") (set_attr "mode" "SF,SF,SI,SI")]) -(define_expand "movdfcc" - [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (match_operand 1 "comparison_operator" "") - (match_operand:DF 2 "register_operand" "") - (match_operand:DF 3 "register_operand" "")))] - "(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)" - "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") - (define_insn "*movdfcc_1" [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" @@ -19623,14 +19607,6 @@ split_di (operands+3, 1, operands+7, operands+8); split_di (operands, 1, operands+2, operands+3);") -(define_expand "movxfcc" - [(set (match_operand:XF 0 "register_operand" "") - (if_then_else:XF (match_operand 1 "comparison_operator" "") - (match_operand:XF 2 "register_operand" "") - (match_operand:XF 3 "register_operand" "")))] - "TARGET_80387 && TARGET_CMOVE" - "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") - (define_insn "*movxfcc_1" [(set (match_operand:XF 0 "register_operand" "=f,f") (if_then_else:XF (match_operator 1 "fcmov_comparison_operator" @@ -19663,41 +19639,25 @@ ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator ;; are undefined in this condition, we're certain this is correct. -(define_insn "sminsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (smin:SF (match_operand:SF 1 "nonimmediate_operand" "%0") - (match_operand:SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE_MATH" - "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn "smaxsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (smax:SF (match_operand:SF 1 "nonimmediate_operand" "%0") - (match_operand:SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE_MATH" - "maxss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn "smindf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (smin:DF (match_operand:DF 1 "nonimmediate_operand" "%0") - (match_operand:DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "minsd\t{%2, %0|%0, %2}" +(define_insn "smin<mode>3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (smin:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" + "mins<ssemodefsuffix>\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) + (set_attr "mode" "<MODE>")]) -(define_insn "smaxdf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (smax:DF (match_operand:DF 1 "nonimmediate_operand" "%0") - (match_operand:DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "maxsd\t{%2, %0|%0, %2}" +(define_insn "smax<mode>3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (smax:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" + "maxs<ssemodefsuffix>\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) + (set_attr "mode" "<MODE>")]) ;; These versions of the min/max patterns implement exactly the operations ;; min = (op1 < op2 ? op1 : op2) @@ -19705,45 +19665,27 @@ ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. -(define_insn "*ieee_sminsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (unspec:SF [(match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MIN))] - "TARGET_SSE_MATH" - "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn "*ieee_smaxsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (unspec:SF [(match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MAX))] - "TARGET_SSE_MATH" - "maxss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn "*ieee_smindf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (unspec:DF [(match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MIN))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "minsd\t{%2, %0|%0, %2}" +(define_insn "*ieee_smin<mode>3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" + "mins<ssemodefsuffix>\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) + (set_attr "mode" "<MODE>")]) -(define_insn "*ieee_smaxdf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (unspec:DF [(match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MAX))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "maxsd\t{%2, %0|%0, %2}" +(define_insn "*ieee_smax<mode>3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" + "maxs<ssemodefsuffix>\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) + (set_attr "mode" "<MODE>")]) ;; Make two stack loads independent: ;; fld aa fld aa @@ -19778,7 +19720,7 @@ (match_operand:QI 2 "register_operand" "") (match_operand:QI 3 "const_int_operand" "")] "" - "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") (define_expand "addhicc" [(match_operand:HI 0 "register_operand" "") @@ -19786,7 +19728,7 @@ (match_operand:HI 2 "register_operand" "") (match_operand:HI 3 "const_int_operand" "")] "" - "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") (define_expand "addsicc" [(match_operand:SI 0 "register_operand" "") @@ -19794,7 +19736,7 @@ (match_operand:SI 2 "register_operand" "") (match_operand:SI 3 "const_int_operand" "")] "" - "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") (define_expand "adddicc" [(match_operand:DI 0 "register_operand" "") @@ -19802,7 +19744,7 @@ (match_operand:DI 2 "register_operand" "") (match_operand:DI 3 "const_int_operand" "")] "TARGET_64BIT" - "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") ;; Misc patterns (?) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index ee819936f6e..3371161f82f 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -63,9 +63,9 @@ (define_insn "*mov<mode>_internal_rex64" [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" - "=rm,r,*y,*y ,m ,*y,Y2,x,x ,m,r,x") + "=rm,r,!y,!y ,m ,!y,Y2,x,x ,m,r,x") (match_operand:MMXMODEI 1 "vector_move_operand" - "Cr ,m,C ,*ym,*y,Y2,*y,C,xm,x,x,r"))] + "Cr ,m,C ,!ym,!y,Y2,!y,C,xm,x,x,r"))] "TARGET_64BIT && TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ @@ -87,9 +87,9 @@ (define_insn "*mov<mode>_internal" [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" - "=*y,*y ,m ,*y ,*Y2,*Y2,*Y2 ,m ,*x,*x,*x,m ,?r ,?m") + "=!y,!y ,m ,!y ,*Y2,*Y2,*Y2 ,m ,*x,*x,*x,m ,?r ,?m") (match_operand:MMXMODEI 1 "vector_move_operand" - "C ,*ym,*y,*Y2,*y ,C ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))] + "C ,!ym,!y,*Y2,!y ,C ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ @@ -122,9 +122,9 @@ (define_insn "*movv2sf_internal_rex64" [(set (match_operand:V2SF 0 "nonimmediate_operand" - "=rm,r,*y ,*y ,m ,*y,Y2,x,x,x,m,r,x") + "=rm,r,!y ,!y ,m ,!y,Y2,x,x,x,m,r,x") (match_operand:V2SF 1 "vector_move_operand" - "Cr ,m ,C ,*ym,*y,Y2,*y,C,x,m,x,x,r"))] + "Cr ,m ,C ,!ym,!y,Y2,!y,C,x,m,x,x,r"))] "TARGET_64BIT && TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ @@ -147,9 +147,9 @@ (define_insn "*movv2sf_internal" [(set (match_operand:V2SF 0 "nonimmediate_operand" - "=*y,*y ,m,*y ,*Y2,*x,*x,*x,m ,?r ,?m") + "=!y,!y ,m,!y ,*Y2,*x,*x,*x,m ,?r ,?m") (match_operand:V2SF 1 "vector_move_operand" - "C ,*ym,*y,*Y2,*y ,C ,*x,m ,*x,irm,r"))] + "C ,!ym,!y,*Y2,!y ,C ,*x,m ,*x,irm,r"))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ diff --git a/gcc/config/i386/netware-libgcc.exp b/gcc/config/i386/netware-libgcc.exp index a3498c0e720..309cf754943 100644 --- a/gcc/config/i386/netware-libgcc.exp +++ b/gcc/config/i386/netware-libgcc.exp @@ -6,6 +6,8 @@ __addvsi3, # __ashldi3, # __ashrdi3, + __bswapdi2, + __bswapsi2, __clzdi2, __clzsi2, __ctzdi2, @@ -18,12 +20,18 @@ __divsc3, # __divtc3, __divxc3, + __emutls_get_address, + __emutls_register_common, __ffsdi2, __ffssi2, __fixunsdfdi, __fixunssfdi, # __fixunstfdi, __fixunsxfdi, + __floatundisf, + __floatundidf, +# __floatunditf, + __floatundixf, __gcc_bcmp, __gcc_personality_v0, # __lshrdi3, @@ -64,6 +72,7 @@ _Unwind_GetDataRelBase, _Unwind_GetGR, _Unwind_GetIP, + _Unwind_GetIPInfo, _Unwind_GetLanguageSpecificData, _Unwind_GetRegionStart, _Unwind_GetTextRelBase, diff --git a/gcc/config/i386/netware.c b/gcc/config/i386/netware.c index 0357baff169..63c26cc7551 100644 --- a/gcc/config/i386/netware.c +++ b/gcc/config/i386/netware.c @@ -1,6 +1,6 @@ /* Subroutines for insn-output.c for NetWare. Contributed by Jan Beulich (jbeulich@novell.com) - Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc. + Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc. This file is part of GCC. @@ -32,28 +32,25 @@ along with GCC; see the file COPYING3. If not see #include "toplev.h" #include "ggc.h" - -/* Return string which is the former assembler name modified with an - underscore prefix and a suffix consisting of an atsign (@) followed - by the number of bytes of arguments */ +/* Return string which is the function name, identified by ID, modified + with PREFIX and a suffix consisting of an atsign (@) followed by the + number of bytes of arguments. If ID is NULL use the DECL_NAME as base. + Return NULL if no change required. */ static tree -gen_stdcall_or_fastcall_decoration (tree decl, char prefix) +gen_stdcall_or_fastcall_decoration (tree decl, tree id, char prefix) { - unsigned total = 0; - /* ??? This probably should use XSTR (XEXP (DECL_RTL (decl), 0), 0) instead - of DECL_ASSEMBLER_NAME. */ - const char *asmname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); - char *newsym; + unsigned HOST_WIDE_INT total = 0; + const char *old_str = IDENTIFIER_POINTER (id != NULL_TREE ? id : DECL_NAME (decl)); + char *new_str; tree type = TREE_TYPE (decl); - tree arg; - function_args_iterator args_iter; if (prototype_p (type)) { - /* These attributes are ignored for variadic functions in - i386.c:ix86_return_pops_args. For compatibility with MS - compiler do not add @0 suffix here. */ + tree arg; + function_args_iterator args_iter; + + /* This attribute is ignored for variadic functions. */ if (stdarg_p (type)) return NULL_TREE; @@ -61,50 +58,50 @@ gen_stdcall_or_fastcall_decoration (tree decl, char prefix) by convert_arguments in c-typeck.c or cp/typeck.c. */ FOREACH_FUNCTION_ARGS(type, arg, args_iter) { - unsigned parm_size; + HOST_WIDE_INT parm_size; + unsigned HOST_WIDE_INT parm_boundary_bytes; if (! COMPLETE_TYPE_P (arg)) break; - parm_size = int_size_in_bytes (TYPE_SIZE (arg)); + parm_size = int_size_in_bytes (arg); if (parm_size < 0) break; + parm_boundary_bytes = PARM_BOUNDARY / BITS_PER_UNIT; + /* Must round up to include padding. This is done the same way as in store_one_arg. */ - parm_size = ((parm_size + PARM_BOUNDARY - 1) - / PARM_BOUNDARY * PARM_BOUNDARY); - total += parm_size; + total += (parm_size + parm_boundary_bytes - 1) + / parm_boundary_bytes * parm_boundary_bytes; } } - newsym = alloca (1 + strlen (asmname) + 1 + 10 + 1); - return get_identifier_with_length (newsym, - sprintf (newsym, - "%c%s@%u", - prefix, - asmname, - total / BITS_PER_UNIT)); + new_str = alloca (1 + strlen (old_str) + 1 + 10 + 1); + sprintf (new_str, "%c%s@" HOST_WIDE_INT_PRINT_UNSIGNED, + prefix, old_str, total); + + return get_identifier (new_str); } -/* Return string which is the former assembler name modified with an - _n@ prefix where n represents the number of arguments passed in - registers */ +/* Return string which is the function name, identified by ID, modified + with an _n@ prefix (where n represents the number of arguments passed in + registers). If ID is NULL use the DECL_NAME as base. + Return NULL if no change required. */ static tree -gen_regparm_prefix (tree decl, unsigned nregs) +gen_regparm_prefix (tree decl, tree id, unsigned int nregs) { - unsigned total = 0; - /* ??? This probably should use XSTR (XEXP (DECL_RTL (decl), 0), 0) instead - of DECL_ASSEMBLER_NAME. */ - const char *asmname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); - char *newsym; + unsigned HOST_WIDE_INT total = 0; + const char *old_str = IDENTIFIER_POINTER (id != NULL_TREE ? id : DECL_NAME (decl)); + char *new_str; tree type = TREE_TYPE (decl); - tree arg; - function_args_iterator args_iter; if (prototype_p (type)) { + tree arg; + function_args_iterator args_iter; + /* This attribute is ignored for variadic functions. */ if (stdarg_p (type)) return NULL_TREE; @@ -113,7 +110,8 @@ gen_regparm_prefix (tree decl, unsigned nregs) by convert_arguments in c-typeck.c or cp/typeck.c. */ FOREACH_FUNCTION_ARGS(type, arg, args_iter) { - unsigned parm_size; + HOST_WIDE_INT parm_size; + unsigned HOST_WIDE_INT parm_boundary_bytes; if (! COMPLETE_TYPE_P (arg)) break; @@ -122,21 +120,58 @@ gen_regparm_prefix (tree decl, unsigned nregs) if (parm_size < 0) break; - parm_size = ((parm_size + PARM_BOUNDARY - 1) - / PARM_BOUNDARY * PARM_BOUNDARY); - total += parm_size; + parm_boundary_bytes = PARM_BOUNDARY / BITS_PER_UNIT; + + /* Must round up to include padding. This is done the same + way as in store_one_arg. */ + total += (parm_size + parm_boundary_bytes - 1) + / parm_boundary_bytes * parm_boundary_bytes; } } - if (nregs > total / BITS_PER_WORD) - nregs = total / BITS_PER_WORD; + if (nregs > total / UNITS_PER_WORD) + nregs = total / UNITS_PER_WORD; gcc_assert (nregs <= 9); - newsym = alloca (3 + strlen (asmname) + 1); - return get_identifier_with_length (newsym, - sprintf (newsym, - "_%u@%s", - nregs, - asmname)); + new_str = alloca (3 + strlen (old_str) + 1); + sprintf (new_str, "_%u@%s", nregs, old_str); + + return get_identifier (new_str); +} + +/* Maybe decorate and get a new identifier for the DECL of a stdcall or + fastcall function. The original identifier is supplied in ID. */ + +static tree +i386_nlm_maybe_mangle_decl_assembler_name (tree decl, tree id) +{ + tree type_attributes = TYPE_ATTRIBUTES (TREE_TYPE (decl)); + tree new_id; + + if (lookup_attribute ("stdcall", type_attributes)) + new_id = gen_stdcall_or_fastcall_decoration (decl, id, '_'); + else if (lookup_attribute ("fastcall", type_attributes)) + new_id = gen_stdcall_or_fastcall_decoration (decl, id, FASTCALL_PREFIX); + else if ((new_id = lookup_attribute ("regparm", type_attributes))) + new_id = gen_regparm_prefix (decl, id, + TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (new_id)))); + else + new_id = NULL_TREE; + + return new_id; +} + +/* This is used as a target hook to modify the DECL_ASSEMBLER_NAME + in the language-independent default hook + langhooks.c:lhd_set_decl_assembler_name () + and in cp/mangle.c:mangle_decl (). */ +tree +i386_nlm_mangle_decl_assembler_name (tree decl, tree id) +{ + tree new_id = TREE_CODE (decl) == FUNCTION_DECL + ? i386_nlm_maybe_mangle_decl_assembler_name (decl, id) + : NULL_TREE; + + return (new_id ? new_id : id); } void @@ -146,31 +181,28 @@ i386_nlm_encode_section_info (tree decl, rtx rtl, int first) if (first && TREE_CODE (decl) == FUNCTION_DECL + /* Do not change the identifier if a verbatim asmspec + or if stdcall suffix already added. */ && *IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)) != '*' && !strchr (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), '@')) { - tree type_attributes = TYPE_ATTRIBUTES (TREE_TYPE (decl)); - tree newid; - - if (lookup_attribute ("stdcall", type_attributes)) - newid = gen_stdcall_or_fastcall_decoration (decl, '_'); - else if (lookup_attribute ("fastcall", type_attributes)) - newid = gen_stdcall_or_fastcall_decoration (decl, FASTCALL_PREFIX); - else if ((newid = lookup_attribute ("regparm", type_attributes)) != NULL_TREE) - newid = gen_regparm_prefix (decl, - TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (newid)))); - if (newid != NULL_TREE) - { - rtx rtlname = XEXP (rtl, 0); + /* FIXME: In Ada, and perhaps other language frontends, + imported stdcall names may not yet have been modified. + Check and do it know. */ + rtx symbol = XEXP (rtl, 0); + tree new_id; + tree old_id = DECL_ASSEMBLER_NAME (decl); + + gcc_assert (GET_CODE (symbol) == SYMBOL_REF); - if (GET_CODE (rtlname) == MEM) - rtlname = XEXP (rtlname, 0); - XSTR (rtlname, 0) = IDENTIFIER_POINTER (newid); + if ((new_id = i386_nlm_maybe_mangle_decl_assembler_name (decl, old_id))) + { /* These attributes must be present on first declaration, change_decl_assembler_name will warn if they are added later and the decl has been referenced, but duplicate_decls - should catch the mismatch before this is called. */ - change_decl_assembler_name (decl, newid); + should catch the mismatch first. */ + change_decl_assembler_name (decl, new_id); + XSTR (symbol, 0) = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); } } } @@ -201,3 +233,24 @@ i386_nlm_strip_name_encoding (const char *str) } return name; } + +/* Sometimes certain combinations of command options do not make + sense on a particular target machine. You can define a macro + `OVERRIDE_OPTIONS' to take account of this. This macro, if + defined, is executed once just after all the command options have + been parsed. + + Don't use this macro to turn on various extra optimizations for + `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ + +void +netware_override_options (void) +{ + override_options (); + + if (flag_pic) + { + error ("-fPIC and -fpic are not supported for this target"); + flag_pic = 0; + } +} diff --git a/gcc/config/i386/netware.h b/gcc/config/i386/netware.h index e7459a2fe79..d4f31e0bbb2 100644 --- a/gcc/config/i386/netware.h +++ b/gcc/config/i386/netware.h @@ -72,6 +72,18 @@ along with GCC; see the file COPYING3. If not see #define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | \ MASK_FLOAT_RETURNS | MASK_ALIGN_DOUBLE | MASK_MS_BITFIELD_LAYOUT) +/* Sometimes certain combinations of command options do not make + sense on a particular target machine. You can define a macro + `OVERRIDE_OPTIONS' to take account of this. This macro, if + defined, is executed once just after all the command options have + been parsed. + + Don't use this macro to turn on various extra optimizations for + `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ +#undef OVERRIDE_OPTIONS +extern void netware_override_options (void); +#define OVERRIDE_OPTIONS netware_override_options () + #undef MATH_LIBRARY #define MATH_LIBRARY "" @@ -142,13 +154,15 @@ along with GCC; see the file COPYING3. If not see function named by the symbol (such as what section it is in). On i386 running NetWare, modify the assembler name with an underscore (_) - prefix and a suffix consisting of an atsign (@) followed by a string of - digits that represents the number of bytes of arguments passed to the - function, if it has the attribute STDCALL. Alternatively, if it has the - REGPARM attribute, prefix it with an underscore (_), a digit representing - the number of registers used, and an atsign (@). */ + or atsign (@) prefix and a suffix consisting of an atsign (@) followed by + a string of digits that represents the number of bytes of arguments passed + to the function, if it has the attribute STDCALL. Alternatively, if it has + the REGPARM attribute, prefix it with an underscore (_), a digit + representing the number of registers used, and an atsign (@). */ void i386_nlm_encode_section_info (tree, rtx, int); +extern tree i386_nlm_mangle_decl_assembler_name (tree, tree); const char *i386_nlm_strip_name_encoding (const char *); #define SUBTARGET_ENCODE_SECTION_INFO i386_nlm_encode_section_info +#define TARGET_MANGLE_DECL_ASSEMBLER_NAME i386_nlm_mangle_decl_assembler_name #undef TARGET_STRIP_NAME_ENCODING #define TARGET_STRIP_NAME_ENCODING i386_nlm_strip_name_encoding diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 43f7ced8c03..97250dbd2ed 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -36,6 +36,9 @@ (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF]) (define_mode_iterator SSEMODEF2P [V4SF V2DF]) +;; Mapping from float mode to required SSE level +(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")]) + ;; Mapping from integer vector mode to mnemonic suffix (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) @@ -137,12 +140,12 @@ gcc_unreachable (); }) -(define_expand "movv4sf" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "") - (match_operand:V4SF 1 "nonimmediate_operand" ""))] +(define_expand "mov<mode>" + [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "") + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" ""))] "TARGET_SSE" { - ix86_expand_vector_move (V4SFmode, operands); + ix86_expand_vector_move (<MODE>mode, operands); DONE; }) @@ -181,15 +184,6 @@ operands[2] = CONST0_RTX (V4SFmode); }) -(define_expand "movv2df" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "") - (match_operand:V2DF 1 "nonimmediate_operand" ""))] - "TARGET_SSE" -{ - ix86_expand_vector_move (V2DFmode, operands); - DONE; -}) - (define_insn "*movv2df_internal" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))] @@ -249,23 +243,16 @@ DONE; }) -(define_insn "sse_movups" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] - "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "movups\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_movupd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] - "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "movupd\t{%1, %0|%0, %1}" +(define_insn "<sse>_movup<ssemodesuffixf2c>" + [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m") + (unspec:SSEMODEF2P + [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVU))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") - (set_attr "mode" "V2DF")]) + (set_attr "mode" "<MODE>")]) (define_insn "sse2_movdqu" [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") @@ -277,23 +264,15 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "sse_movntv4sf" - [(set (match_operand:V4SF 0 "memory_operand" "=m") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] - UNSPEC_MOVNT))] - "TARGET_SSE" - "movntps\t{%1, %0|%0, %1}" +(define_insn "<sse>_movnt<mode>" + [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m") + (unspec:SSEMODEF2P + [(match_operand:SSEMODEF2P 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) - -(define_insn "sse2_movntv2df" - [(set (match_operand:V2DF 0 "memory_operand" "=m") - (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] - UNSPEC_MOVNT))] - "TARGET_SSE2" - "movntpd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) + (set_attr "mode" "<MODE>")]) (define_insn "sse2_movntv2di" [(set (match_operand:V2DI 0 "memory_operand" "=m") @@ -328,18 +307,20 @@ ; that directly map to insns are defined; it would be possible to ; define patterns for other modes that would expand to several insns. -(define_expand "storentv4sf" - [(set (match_operand:V4SF 0 "memory_operand" "") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")] - UNSPEC_MOVNT))] - "TARGET_SSE" +(define_expand "storent<mode>" + [(set (match_operand:SSEMODEF2P 0 "memory_operand" "") + (unspec:SSEMODEF2P + [(match_operand:SSEMODEF2P 1 "register_operand" "")] + UNSPEC_MOVNT))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" "") -(define_expand "storentv2df" - [(set (match_operand:V2DF 0 "memory_operand" "") - (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "")] - UNSPEC_MOVNT))] - "TARGET_SSE2" +(define_expand "storent<mode>" + [(set (match_operand:MODEF 0 "memory_operand" "") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "")] + UNSPEC_MOVNT))] + "TARGET_SSE4A" "") (define_expand "storentv2di" @@ -356,121 +337,120 @@ "TARGET_SSE2" "") -(define_expand "storentdf" - [(set (match_operand:DF 0 "memory_operand" "") - (unspec:DF [(match_operand:DF 1 "register_operand" "")] - UNSPEC_MOVNT))] - "TARGET_SSE4A" - "") - -(define_expand "storentsf" - [(set (match_operand:SF 0 "memory_operand" "") - (unspec:SF [(match_operand:SF 1 "register_operand" "")] - UNSPEC_MOVNT))] - "TARGET_SSE4A" - "") - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; -;; Parallel single-precision floating point arithmetic +;; Parallel floating point arithmetic ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_expand "negv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "") - (neg:V4SF (match_operand:V4SF 1 "register_operand" "")))] - "TARGET_SSE" - "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;") +(define_expand "neg<mode>2" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "") + (neg:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "ix86_expand_fp_absneg_operator (NEG, <MODE>mode, operands); DONE;") -(define_expand "absv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "") - (abs:V4SF (match_operand:V4SF 1 "register_operand" "")))] - "TARGET_SSE" - "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;") +(define_expand "abs<mode>2" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "") + (abs:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "ix86_expand_fp_absneg_operator (ABS, <MODE>mode, operands); DONE;") -(define_expand "addv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" - "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);") +(define_expand "add<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "") + (plus:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);") -(define_insn "*addv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" - "addps\t{%2, %0|%0, %2}" +(define_insn "*add<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (plus:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode) + && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" + "addp<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "<MODE>")]) -(define_insn "sse_vmaddv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) +(define_insn "<sse>_vmadd<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (vec_merge:SSEMODEF2P + (plus:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) (match_dup 1) (const_int 1)))] - "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" - "addss\t{%2, %0|%0, %2}" + "SSE_VEC_FLOAT_MODE_P (<MODE>mode) + && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" + "adds<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) + (set_attr "mode" "<ssescalarmode>")]) -(define_expand "subv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (minus:V4SF (match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" - "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);") +(define_expand "sub<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "") + (minus:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "register_operand" "") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);") -(define_insn "*subv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "subps\t{%2, %0|%0, %2}" +(define_insn "*sub<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (minus:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "subp<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "<MODE>")]) -(define_insn "sse_vmsubv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) +(define_insn "<sse>_vmsub<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (vec_merge:SSEMODEF2P + (minus:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) (match_dup 1) (const_int 1)))] - "TARGET_SSE" - "subss\t{%2, %0|%0, %2}" + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "subs<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) + (set_attr "mode" "<ssescalarmode>")]) -(define_expand "mulv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" - "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);") +(define_expand "mul<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "") + (mult:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") -(define_insn "*mulv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)" - "mulps\t{%2, %0|%0, %2}" +(define_insn "*mul<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (mult:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode) + && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" + "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "ssemul") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "<MODE>")]) -(define_insn "sse_vmmulv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) +(define_insn "<sse>_vmmul<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (vec_merge:SSEMODEF2P + (mult:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) (match_dup 1) (const_int 1)))] - "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)" - "mulss\t{%2, %0|%0, %2}" + "SSE_VEC_FLOAT_MODE_P (<MODE>mode) + && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" + "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "ssemul") - (set_attr "mode" "SF")]) + (set_attr "mode" "<ssescalarmode>")]) (define_expand "divv4sf3" [(set (match_operand:V4SF 0 "register_operand" "") @@ -490,31 +470,40 @@ } }) -(define_insn "sse_divv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (div:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "divps\t{%2, %0|%0, %2}" +(define_expand "divv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (div:V2DF (match_operand:V2DF 1 "register_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);") + +(define_insn "<sse>_div<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (div:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "ssediv") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "<MODE>")]) -(define_insn "sse_vmdivv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (div:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) +(define_insn "<sse>_vmdiv<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (vec_merge:SSEMODEF2P + (div:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) (match_dup 1) (const_int 1)))] - "TARGET_SSE" - "divss\t{%2, %0|%0, %2}" + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "ssediv") - (set_attr "mode" "SF")]) + (set_attr "mode" "<ssescalarmode>")]) (define_insn "sse_rcpv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] "TARGET_SSE" "rcpps\t{%1, %0|%0, %1}" [(set_attr "type" "sse") @@ -532,6 +521,48 @@ [(set_attr "type" "sse") (set_attr "mode" "SF")]) +(define_expand "sqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "") + (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] + "TARGET_SSE" +{ + if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0); + DONE; + } +}) + +(define_insn "sse_sqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "sqrtps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "sqrtv2df2" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "sqrtpd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V2DF")]) + +(define_insn "<sse>_vmsqrt<mode>2" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (vec_merge:SSEMODEF2P + (sqrt:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")) + (match_operand:SSEMODEF2P 2 "register_operand" "0") + (const_int 1)))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "<ssescalarmode>")]) + (define_expand "rsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "") (unspec:V4SF @@ -563,126 +594,101 @@ [(set_attr "type" "sse") (set_attr "mode" "SF")]) -(define_expand "sqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "") - (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] - "TARGET_SSE" -{ - if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size - && flag_finite_math_only && !flag_trapping_math - && flag_unsafe_math_optimizations) - { - ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0); - DONE; - } -}) - -(define_insn "sse_sqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "sqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) - -(define_insn "sse_vmsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")) - (match_operand:V4SF 2 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE" - "sqrtss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX ;; isn't really correct, as those rtl operators aren't defined when ;; applied to NaNs. Hopefully the optimizers won't get too smart on us. -(define_expand "smaxv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" +(define_expand "smin<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "") + (smin:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" { if (!flag_finite_math_only) - operands[1] = force_reg (V4SFmode, operands[1]); - ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands); + operands[1] = force_reg (<MODE>mode, operands[1]); + ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands); }) -(define_insn "*smaxv4sf3_finite" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && flag_finite_math_only - && ix86_binary_operator_ok (SMAX, V4SFmode, operands)" - "maxps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) +(define_insn "*smin<mode>3_finite" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (smin:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only + && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)" + "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "<MODE>")]) -(define_insn "*smaxv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "maxps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) +(define_insn "*smin<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (smin:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "<MODE>")]) -(define_insn "sse_vmsmaxv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) +(define_insn "<sse>_vmsmin<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (vec_merge:SSEMODEF2P + (smin:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) (match_dup 1) (const_int 1)))] - "TARGET_SSE" - "maxss\t{%2, %0|%0, %2}" + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "mins<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "sse") - (set_attr "mode" "SF")]) + (set_attr "mode" "<ssescalarmode>")]) -(define_expand "sminv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" +(define_expand "smax<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "") + (smax:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" { if (!flag_finite_math_only) - operands[1] = force_reg (V4SFmode, operands[1]); - ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands); + operands[1] = force_reg (<MODE>mode, operands[1]); + ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands); }) -(define_insn "*sminv4sf3_finite" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && flag_finite_math_only - && ix86_binary_operator_ok (SMIN, V4SFmode, operands)" - "minps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) +(define_insn "*smax<mode>3_finite" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (smax:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only + && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)" + "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "<MODE>")]) -(define_insn "*sminv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "minps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) +(define_insn "*smax<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (smax:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "<MODE>")]) -(define_insn "sse_vmsminv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) +(define_insn "<sse>_vmsmax<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (vec_merge:SSEMODEF2P + (smax:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "maxs<ssemodesuffixf2c>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "<ssescalarmode>")]) ;; These versions of the min/max patterns implement exactly the operations ;; min = (op1 < op2 ? op1 : op2) @@ -690,45 +696,27 @@ ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. -(define_insn "*ieee_sminv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MIN))] - "TARGET_SSE" - "minps\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V4SF")]) - -(define_insn "*ieee_smaxv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MAX))] - "TARGET_SSE" - "maxps\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V4SF")]) - -(define_insn "*ieee_sminv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MIN))] - "TARGET_SSE2" - "minpd\t{%2, %0|%0, %2}" +(define_insn "*ieee_smin<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (unspec:SSEMODEF2P + [(match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) + (set_attr "mode" "<MODE>")]) -(define_insn "*ieee_smaxv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MAX))] - "TARGET_SSE2" - "maxpd\t{%2, %0|%0, %2}" +(define_insn "*ieee_smax<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (unspec:SSEMODEF2P + [(match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) + (set_attr "mode" "<MODE>")]) (define_insn "sse3_addsubv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -744,6 +732,19 @@ (set_attr "prefix_rep" "1") (set_attr "mode" "V4SF")]) +(define_insn "sse3_addsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (plus:V2DF + (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (minus:V2DF (match_dup 1) (match_dup 2)) + (const_int 1)))] + "TARGET_SSE3" + "addsubpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + (define_insn "sse3_haddv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_concat:V4SF @@ -771,6 +772,24 @@ (set_attr "prefix_rep" "1") (set_attr "mode" "V4SF")]) +(define_insn "sse3_haddv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_concat:V2DF + (plus:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) + (plus:DF + (vec_select:DF + (match_operand:V2DF 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_SSE3" + "haddpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + (define_insn "sse3_hsubv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_concat:V4SF @@ -798,6 +817,24 @@ (set_attr "prefix_rep" "1") (set_attr "mode" "V4SF")]) +(define_insn "sse3_hsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_concat:V2DF + (minus:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) + (minus:DF + (vec_select:DF + (match_operand:V2DF 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_SSE3" + "hsubpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + (define_expand "reduc_splus_v4sf" [(match_operand:V4SF 0 "register_operand" "") (match_operand:V4SF 1 "register_operand" "")] @@ -814,6 +851,15 @@ DONE; }) +(define_expand "reduc_splus_v2df" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:V2DF 1 "register_operand" "")] + "TARGET_SSE3" +{ + emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1])); + DONE; +}) + (define_expand "reduc_smax_v4sf" [(match_operand:V4SF 0 "register_operand" "") (match_operand:V4SF 1 "register_operand" "")] @@ -834,80 +880,71 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; -;; Parallel single-precision floating point comparisons +;; Parallel floating point comparisons ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_insn "sse_maskcmpv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (match_operator:V4SF 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))] - "TARGET_SSE && !TARGET_SSE5" - "cmp%D3ps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "V4SF")]) - -(define_insn "sse_maskcmpsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (match_operator:SF 3 "sse_comparison_operator" - [(match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "nonimmediate_operand" "xm")]))] - "TARGET_SSE && !TARGET_SSE5" - "cmp%D3ss\t{%2, %0|%0, %2}" +(define_insn "<sse>_maskcmp<mode>3" + [(set (match_operand:SSEMODEF4 0 "register_operand" "=x") + (match_operator:SSEMODEF4 3 "sse_comparison_operator" + [(match_operand:SSEMODEF4 1 "register_operand" "0") + (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))] + "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode)) + && !TARGET_SSE5" + "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") - (set_attr "mode" "SF")]) + (set_attr "mode" "<MODE>")]) -(define_insn "sse_vmmaskcmpv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operator:V4SF 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x")]) +(define_insn "<sse>_vmmaskcmp<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (vec_merge:SSEMODEF2P + (match_operator:SSEMODEF2P 3 "sse_comparison_operator" + [(match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]) (match_dup 1) (const_int 1)))] - "TARGET_SSE && !TARGET_SSE5" - "cmp%D3ss\t{%2, %0|%0, %2}" + "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5" + "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") - (set_attr "mode" "SF")]) + (set_attr "mode" "<ssescalarmode>")]) -(define_insn "sse_comi" +(define_insn "<sse>_comi" [(set (reg:CCFP FLAGS_REG) (compare:CCFP - (vec_select:SF - (match_operand:V4SF 0 "register_operand" "x") + (vec_select:MODEF + (match_operand:<ssevecmode> 0 "register_operand" "x") (parallel [(const_int 0)])) - (vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (vec_select:MODEF + (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm") (parallel [(const_int 0)]))))] - "TARGET_SSE" - "comiss\t{%1, %0|%0, %1}" + "SSE_FLOAT_MODE_P (<MODE>mode)" + "comis<ssemodefsuffix>\t{%1, %0|%0, %1}" [(set_attr "type" "ssecomi") - (set_attr "mode" "SF")]) + (set_attr "mode" "<MODE>")]) -(define_insn "sse_ucomi" +(define_insn "<sse>_ucomi" [(set (reg:CCFPU FLAGS_REG) (compare:CCFPU - (vec_select:SF - (match_operand:V4SF 0 "register_operand" "x") + (vec_select:MODEF + (match_operand:<ssevecmode> 0 "register_operand" "x") (parallel [(const_int 0)])) - (vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (vec_select:MODEF + (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm") (parallel [(const_int 0)]))))] - "TARGET_SSE" - "ucomiss\t{%1, %0|%0, %1}" + "SSE_FLOAT_MODE_P (<MODE>mode)" + "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}" [(set_attr "type" "ssecomi") - (set_attr "mode" "SF")]) + (set_attr "mode" "<MODE>")]) -(define_expand "vcondv4sf" - [(set (match_operand:V4SF 0 "register_operand" "") - (if_then_else:V4SF +(define_expand "vcond<mode>" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "") + (if_then_else:SSEMODEF2P (match_operator 3 "" - [(match_operand:V4SF 4 "nonimmediate_operand" "") - (match_operand:V4SF 5 "nonimmediate_operand" "")]) - (match_operand:V4SF 1 "general_operand" "") - (match_operand:V4SF 2 "general_operand" "")))] - "TARGET_SSE" + [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "") + (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")]) + (match_operand:SSEMODEF2P 1 "general_operand" "") + (match_operand:SSEMODEF2P 2 "general_operand" "")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" { if (ix86_expand_fp_vcond (operands)) DONE; @@ -917,666 +954,123 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; -;; Parallel single-precision floating point logical operations +;; Parallel floating point logical operations ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_expand "andv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" - "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);") +(define_expand "and<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "") + (and:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);") -(define_insn "*andv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)" - "andps\t{%2, %0|%0, %2}" +(define_insn "*and<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (and:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode) + && ix86_binary_operator_ok (AND, V4SFmode, operands)" + "andp<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "<MODE>")]) -(define_insn "sse_nandv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0")) - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "andnps\t{%2, %0|%0, %2}" +(define_insn "<sse>_nand<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (and:SSEMODEF2P + (not:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "register_operand" "0")) + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "<MODE>")]) -(define_expand "iorv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" - "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);") +(define_expand "ior<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "") + (ior:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);") -(define_insn "*iorv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)" - "orps\t{%2, %0|%0, %2}" +(define_insn "*ior<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (ior:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode) + && ix86_binary_operator_ok (IOR, <MODE>mode, operands)" + "orp<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "<MODE>")]) -(define_expand "xorv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" - "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);") +(define_expand "xor<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "") + (xor:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);") -(define_insn "*xorv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)" - "xorps\t{%2, %0|%0, %2}" +(define_insn "*xor<mode>3" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (xor:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode) + && ix86_binary_operator_ok (XOR, <MODE>mode, operands)" + "xorp<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "<MODE>")]) ;; Also define scalar versions. These are used for abs, neg, and ;; conditional move. Using subregs into vector modes causes register ;; allocation lossage. These patterns do not allow memory operands ;; because the native instructions read the full 128-bits. -(define_insn "*andsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (and:SF (match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "register_operand" "x")))] - "TARGET_SSE" - "andps\t{%2, %0|%0, %2}" +(define_insn "*and<mode>3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (and:MODEF + (match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "register_operand" "x")))] + "SSE_FLOAT_MODE_P (<MODE>mode)" + "andp<ssemodefsuffix>\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) - -(define_insn "*nandsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (and:SF (not:SF (match_operand:SF 1 "register_operand" "0")) - (match_operand:SF 2 "register_operand" "x")))] - "TARGET_SSE" - "andnps\t{%2, %0|%0, %2}" + (set_attr "mode" "<ssevecmode>")]) + +(define_insn "*nand<mode>3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (and:MODEF + (not:MODEF + (match_operand:MODEF 1 "register_operand" "0")) + (match_operand:MODEF 2 "register_operand" "x")))] + "SSE_FLOAT_MODE_P (<MODE>mode)" + "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) - -(define_insn "*iorsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (ior:SF (match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "register_operand" "x")))] - "TARGET_SSE" - "orps\t{%2, %0|%0, %2}" + (set_attr "mode" "<ssevecmode>")]) + +(define_insn "*ior<mode>3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (ior:MODEF + (match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "register_operand" "x")))] + "SSE_FLOAT_MODE_P (<MODE>mode)" + "orp<ssemodefsuffix>\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) - -(define_insn "*xorsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (xor:SF (match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "register_operand" "x")))] - "TARGET_SSE" - "xorps\t{%2, %0|%0, %2}" + (set_attr "mode" "<ssevecmode>")]) + +(define_insn "*xor<mode>3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (xor:MODEF + (match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "register_operand" "x")))] + "SSE_FLOAT_MODE_P (<MODE>mode)" + "xorp<ssemodefsuffix>\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; Parallel single-precision floating point conversion operations -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define_insn "sse_cvtpi2ps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (vec_duplicate:V4SF - (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) - (match_operand:V4SF 1 "register_operand" "0") - (const_int 3)))] - "TARGET_SSE" - "cvtpi2ps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "sse_cvtps2pi" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] - UNSPEC_FIX_NOTRUNC) - (parallel [(const_int 0) (const_int 1)])))] - "TARGET_SSE" - "cvtps2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "unit" "mmx") - (set_attr "mode" "DI")]) - -(define_insn "sse_cvttps2pi" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI - (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) - (parallel [(const_int 0) (const_int 1)])))] - "TARGET_SSE" - "cvttps2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "unit" "mmx") - (set_attr "mode" "SF")]) - -(define_insn "sse_cvtsi2ss" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (vec_merge:V4SF - (vec_duplicate:V4SF - (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m"))) - (match_operand:V4SF 1 "register_operand" "0,0") - (const_int 1)))] - "TARGET_SSE" - "cvtsi2ss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "vector,double") - (set_attr "amdfam10_decode" "vector,double") - (set_attr "mode" "SF")]) - -(define_insn "sse_cvtsi2ssq" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (vec_merge:V4SF - (vec_duplicate:V4SF - (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) - (match_operand:V4SF 1 "register_operand" "0,0") - (const_int 1)))] - "TARGET_SSE && TARGET_64BIT" - "cvtsi2ssq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "vector,double") - (set_attr "amdfam10_decode" "vector,double") - (set_attr "mode" "SF")]) - -(define_insn "sse_cvtss2si" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (unspec:SI - [(vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,m") - (parallel [(const_int 0)]))] - UNSPEC_FIX_NOTRUNC))] - "TARGET_SSE" - "cvtss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "double,vector") - (set_attr "prefix_rep" "1") - (set_attr "mode" "SI")]) - -(define_insn "sse_cvtss2si_2" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")] - UNSPEC_FIX_NOTRUNC))] - "TARGET_SSE" - "cvtss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "double,vector") - (set_attr "amdfam10_decode" "double,double") - (set_attr "prefix_rep" "1") - (set_attr "mode" "SI")]) - -(define_insn "sse_cvtss2siq" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (unspec:DI - [(vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,m") - (parallel [(const_int 0)]))] - UNSPEC_FIX_NOTRUNC))] - "TARGET_SSE && TARGET_64BIT" - "cvtss2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "double,vector") - (set_attr "prefix_rep" "1") - (set_attr "mode" "DI")]) - -(define_insn "sse_cvtss2siq_2" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")] - UNSPEC_FIX_NOTRUNC))] - "TARGET_SSE && TARGET_64BIT" - "cvtss2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "double,vector") - (set_attr "amdfam10_decode" "double,double") - (set_attr "prefix_rep" "1") - (set_attr "mode" "DI")]) - -(define_insn "sse_cvttss2si" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (fix:SI - (vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,m") - (parallel [(const_int 0)]))))] - "TARGET_SSE" - "cvttss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "double,vector") - (set_attr "amdfam10_decode" "double,double") - (set_attr "prefix_rep" "1") - (set_attr "mode" "SI")]) - -(define_insn "sse_cvttss2siq" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (fix:DI - (vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,m") - (parallel [(const_int 0)]))))] - "TARGET_SSE && TARGET_64BIT" - "cvttss2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "double,vector") - (set_attr "amdfam10_decode" "double,double") - (set_attr "prefix_rep" "1") - (set_attr "mode" "DI")]) - -(define_insn "sse2_cvtdq2ps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "cvtdq2ps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "sse2_cvtps2dq" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] - UNSPEC_FIX_NOTRUNC))] - "TARGET_SSE2" - "cvtps2dq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "prefix_data16" "1") - (set_attr "mode" "TI")]) - -(define_insn "sse2_cvttps2dq" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "cvttps2dq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "prefix_rep" "1") - (set_attr "mode" "TI")]) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; Parallel single-precision floating point element swizzling -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define_insn "sse_movhlps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") - (vec_select:V4SF - (vec_concat:V8SF - (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0") - (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x")) - (parallel [(const_int 6) - (const_int 7) - (const_int 2) - (const_int 3)])))] - "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "@ - movhlps\t{%2, %0|%0, %2} - movlps\t{%H2, %0|%0, %H2} - movhps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF,V2SF,V2SF")]) - -(define_insn "sse_movlhps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") - (vec_select:V4SF - (vec_concat:V8SF - (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0") - (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x")) - (parallel [(const_int 0) - (const_int 1) - (const_int 4) - (const_int 5)])))] - "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)" - "@ - movlhps\t{%2, %0|%0, %2} - movhps\t{%2, %0|%0, %2} - movlps\t{%2, %H0|%H0, %2}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF,V2SF,V2SF")]) - -(define_insn "sse_unpckhps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_select:V4SF - (vec_concat:V8SF - (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (parallel [(const_int 2) (const_int 6) - (const_int 3) (const_int 7)])))] - "TARGET_SSE" - "unpckhps\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) - -(define_insn "sse_unpcklps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_select:V4SF - (vec_concat:V8SF - (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (parallel [(const_int 0) (const_int 4) - (const_int 1) (const_int 5)])))] - "TARGET_SSE" - "unpcklps\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) - -;; These are modeled with the same vec_concat as the others so that we -;; capture users of shufps that can use the new instructions -(define_insn "sse3_movshdup" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_select:V4SF - (vec_concat:V8SF - (match_operand:V4SF 1 "nonimmediate_operand" "xm") - (match_dup 1)) - (parallel [(const_int 1) - (const_int 1) - (const_int 7) - (const_int 7)])))] - "TARGET_SSE3" - "movshdup\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "prefix_rep" "1") - (set_attr "mode" "V4SF")]) - -(define_insn "sse3_movsldup" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_select:V4SF - (vec_concat:V8SF - (match_operand:V4SF 1 "nonimmediate_operand" "xm") - (match_dup 1)) - (parallel [(const_int 0) - (const_int 0) - (const_int 6) - (const_int 6)])))] - "TARGET_SSE3" - "movsldup\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "prefix_rep" "1") - (set_attr "mode" "V4SF")]) - -(define_expand "sse_shufps" - [(match_operand:V4SF 0 "register_operand" "") - (match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "") - (match_operand:SI 3 "const_int_operand" "")] - "TARGET_SSE" -{ - int mask = INTVAL (operands[3]); - emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2], - GEN_INT ((mask >> 0) & 3), - GEN_INT ((mask >> 2) & 3), - GEN_INT (((mask >> 4) & 3) + 4), - GEN_INT (((mask >> 6) & 3) + 4))); - DONE; -}) - -(define_insn "sse_shufps_1" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_select:V4SF - (vec_concat:V8SF - (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (parallel [(match_operand 3 "const_0_to_3_operand" "") - (match_operand 4 "const_0_to_3_operand" "") - (match_operand 5 "const_4_to_7_operand" "") - (match_operand 6 "const_4_to_7_operand" "")])))] - "TARGET_SSE" -{ - int mask = 0; - mask |= INTVAL (operands[3]) << 0; - mask |= INTVAL (operands[4]) << 2; - mask |= (INTVAL (operands[5]) - 4) << 4; - mask |= (INTVAL (operands[6]) - 4) << 6; - operands[3] = GEN_INT (mask); - - return "shufps\t{%3, %2, %0|%0, %2, %3}"; -} - [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) - -(define_insn "sse_storehps" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") - (vec_select:V2SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") - (parallel [(const_int 2) (const_int 3)])))] - "TARGET_SSE" - "@ - movhps\t{%1, %0|%0, %1} - movhlps\t{%1, %0|%0, %1} - movlps\t{%H1, %0|%0, %H1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V2SF,V4SF,V2SF")]) - -(define_insn "sse_loadhps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") - (vec_concat:V4SF - (vec_select:V2SF - (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0") - (parallel [(const_int 0) (const_int 1)])) - (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))] - "TARGET_SSE" - "@ - movhps\t{%2, %0|%0, %2} - movlhps\t{%2, %0|%0, %2} - movlps\t{%2, %H0|%H0, %2}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V2SF,V4SF,V2SF")]) - -(define_insn "sse_storelps" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") - (vec_select:V2SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m") - (parallel [(const_int 0) (const_int 1)])))] - "TARGET_SSE" - "@ - movlps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1} - movlps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V2SF,V4SF,V2SF")]) - -(define_insn "sse_loadlps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") - (vec_concat:V4SF - (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x") - (vec_select:V2SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0") - (parallel [(const_int 2) (const_int 3)]))))] - "TARGET_SSE" - "@ - shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} - movlps\t{%2, %0|%0, %2} - movlps\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog,ssemov,ssemov") - (set_attr "mode" "V4SF,V2SF,V2SF")]) - -(define_insn "sse_movss" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 2 "register_operand" "x") - (match_operand:V4SF 1 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE" - "movss\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemov") - (set_attr "mode" "SF")]) - -(define_insn "*vec_dupv4sf" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_duplicate:V4SF - (match_operand:SF 1 "register_operand" "0")))] - "TARGET_SSE" - "shufps\t{$0, %0, %0|%0, %0, 0}" - [(set_attr "type" "sselog1") - (set_attr "mode" "V4SF")]) - -;; ??? In theory we can match memory for the MMX alternative, but allowing -;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE -;; alternatives pretty much forces the MMX alternative to be chosen. -(define_insn "*sse_concatv2sf" - [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y") - (vec_concat:V2SF - (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m") - (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))] - "TARGET_SSE" - "@ - unpcklps\t{%2, %0|%0, %2} - movss\t{%1, %0|%0, %1} - punpckldq\t{%2, %0|%0, %2} - movd\t{%1, %0|%0, %1}" - [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") - (set_attr "mode" "V4SF,SF,DI,DI")]) - -(define_insn "*sse_concatv4sf" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (vec_concat:V4SF - (match_operand:V2SF 1 "register_operand" " 0,0") - (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))] - "TARGET_SSE" - "@ - movlhps\t{%2, %0|%0, %2} - movhps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF,V2SF")]) - -(define_expand "vec_initv4sf" - [(match_operand:V4SF 0 "register_operand" "") - (match_operand 1 "" "")] - "TARGET_SSE" -{ - ix86_expand_vector_init (false, operands[0], operands[1]); - DONE; -}) - -(define_insn "vec_setv4sf_0" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m") - (vec_merge:V4SF - (vec_duplicate:V4SF - (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF")) - (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0") - (const_int 1)))] - "TARGET_SSE" - "@ - movss\t{%2, %0|%0, %2} - movss\t{%2, %0|%0, %2} - movd\t{%2, %0|%0, %2} - #" - [(set_attr "type" "ssemov") - (set_attr "mode" "SF")]) - -;; A subset is vec_setv4sf. -(define_insn "*vec_setv4sf_sse4_1" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (vec_duplicate:V4SF - (match_operand:SF 2 "nonimmediate_operand" "xm")) - (match_operand:V4SF 1 "register_operand" "0") - (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))] - "TARGET_SSE4_1" -{ - operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); - return "insertps\t{%3, %2, %0|%0, %2, %3}"; -} - [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") - (set_attr "mode" "V4SF")]) - -(define_insn "sse4_1_insertps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x") - (match_operand:V4SF 1 "register_operand" "0") - (match_operand:SI 3 "const_0_to_255_operand" "n")] - UNSPEC_INSERTPS))] - "TARGET_SSE4_1" - "insertps\t{%3, %2, %0|%0, %2, %3}"; - [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") - (set_attr "mode" "V4SF")]) - -(define_split - [(set (match_operand:V4SF 0 "memory_operand" "") - (vec_merge:V4SF - (vec_duplicate:V4SF - (match_operand:SF 1 "nonmemory_operand" "")) - (match_dup 0) - (const_int 1)))] - "TARGET_SSE && reload_completed" - [(const_int 0)] -{ - emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]); - DONE; -}) - -(define_expand "vec_setv4sf" - [(match_operand:V4SF 0 "register_operand" "") - (match_operand:SF 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_SSE" -{ - ix86_expand_vector_set (false, operands[0], operands[1], - INTVAL (operands[2])); - DONE; -}) - -(define_insn_and_split "*vec_extractv4sf_0" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr") - (vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m") - (parallel [(const_int 0)])))] - "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "#" - "&& reload_completed" - [(const_int 0)] -{ - rtx op1 = operands[1]; - if (REG_P (op1)) - op1 = gen_rtx_REG (SFmode, REGNO (op1)); - else - op1 = gen_lowpart (SFmode, op1); - emit_move_insn (operands[0], op1); - DONE; -}) - -(define_insn "*sse4_1_extractps" - [(set (match_operand:SF 0 "nonimmediate_operand" "=rm") - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))] - "TARGET_SSE4_1" - "extractps\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") - (set_attr "mode" "V4SF")]) - -(define_insn_and_split "*vec_extract_v4sf_mem" - [(set (match_operand:SF 0 "register_operand" "=x*rf") - (vec_select:SF - (match_operand:V4SF 1 "memory_operand" "o") - (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))] - "" - "#" - "reload_completed" - [(const_int 0)] -{ - int i = INTVAL (operands[2]); - - emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4)); - DONE; -}) - -(define_expand "vec_extractv4sf" - [(match_operand:SF 0 "register_operand" "") - (match_operand:V4SF 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_SSE" -{ - ix86_expand_vector_extract (false, operands[0], operands[1], - INTVAL (operands[2])); - DONE; -}) + (set_attr "mode" "<ssevecmode>")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -2137,485 +1631,179 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; -;; Parallel double-precision floating point arithmetic +;; Parallel single-precision floating point conversion operations ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_expand "negv2df2" - [(set (match_operand:V2DF 0 "register_operand" "") - (neg:V2DF (match_operand:V2DF 1 "register_operand" "")))] - "TARGET_SSE2" - "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;") - -(define_expand "absv2df2" - [(set (match_operand:V2DF 0 "register_operand" "") - (abs:V2DF (match_operand:V2DF 1 "register_operand" "")))] - "TARGET_SSE2" - "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;") - -(define_expand "addv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);") - -(define_insn "*addv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)" - "addpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_vmaddv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" - "addsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) - -(define_expand "subv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);") - -(define_insn "*subv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "subpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_vmsubv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "subsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) - -(define_expand "mulv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);") - -(define_insn "*mulv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)" - "mulpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemul") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_vmmulv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)" - "mulsd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemul") - (set_attr "mode" "DF")]) - -(define_expand "divv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (div:V2DF (match_operand:V2DF 1 "register_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);") - -(define_insn "*divv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (div:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "divpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssediv") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_vmdivv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (div:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "divsd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssediv") - (set_attr "mode" "DF")]) - -(define_insn "sqrtv2df2" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "sqrtpd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_vmsqrtv2df2" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")) - (match_operand:V2DF 2 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE2" - "sqrtsd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "DF")]) - -;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX -;; isn't really correct, as those rtl operators aren't defined when -;; applied to NaNs. Hopefully the optimizers won't get too smart on us. - -(define_expand "smaxv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" -{ - if (!flag_finite_math_only) - operands[1] = force_reg (V2DFmode, operands[1]); - ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands); -}) - -(define_insn "*smaxv2df3_finite" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && flag_finite_math_only - && ix86_binary_operator_ok (SMAX, V2DFmode, operands)" - "maxpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "*smaxv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "maxpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_vmsmaxv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "maxsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) - -(define_expand "sminv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" -{ - if (!flag_finite_math_only) - operands[1] = force_reg (V2DFmode, operands[1]); - ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands); -}) +(define_insn "sse_cvtpi2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (vec_duplicate:V4SF + (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) + (match_operand:V4SF 1 "register_operand" "0") + (const_int 3)))] + "TARGET_SSE" + "cvtpi2ps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) -(define_insn "*sminv2df3_finite" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && flag_finite_math_only - && ix86_binary_operator_ok (SMIN, V2DFmode, operands)" - "minpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) +(define_insn "sse_cvtps2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX_NOTRUNC) + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_SSE" + "cvtps2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "unit" "mmx") + (set_attr "mode" "DI")]) -(define_insn "*sminv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "minpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) +(define_insn "sse_cvttps2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_SSE" + "cvttps2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "unit" "mmx") + (set_attr "mode" "SF")]) -(define_insn "sse2_vmsminv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) +(define_insn "sse_cvtsi2ss" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (vec_duplicate:V4SF + (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m"))) + (match_operand:V4SF 1 "register_operand" "0,0") (const_int 1)))] - "TARGET_SSE2" - "minsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) + "TARGET_SSE" + "cvtsi2ss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "vector,double") + (set_attr "amdfam10_decode" "vector,double") + (set_attr "mode" "SF")]) -(define_insn "sse3_addsubv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (plus:V2DF - (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (minus:V2DF (match_dup 1) (match_dup 2)) +(define_insn "sse_cvtsi2ssq" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (vec_duplicate:V4SF + (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) + (match_operand:V4SF 1 "register_operand" "0,0") (const_int 1)))] - "TARGET_SSE3" - "addsubpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "sse3_haddv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_concat:V2DF - (plus:DF - (vec_select:DF - (match_operand:V2DF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) - (plus:DF - (vec_select:DF - (match_operand:V2DF 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] - "TARGET_SSE3" - "haddpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "sse3_hsubv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_concat:V2DF - (minus:DF - (vec_select:DF - (match_operand:V2DF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) - (minus:DF - (vec_select:DF - (match_operand:V2DF 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] - "TARGET_SSE3" - "hsubpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_expand "reduc_splus_v2df" - [(match_operand:V2DF 0 "register_operand" "") - (match_operand:V2DF 1 "register_operand" "")] - "TARGET_SSE3" -{ - emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1])); - DONE; -}) + "TARGET_SSE && TARGET_64BIT" + "cvtsi2ssq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "vector,double") + (set_attr "amdfam10_decode" "vector,double") + (set_attr "mode" "SF")]) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; Parallel double-precision floating point comparisons -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define_insn "sse_cvtss2si" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI + [(vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE" + "cvtss2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "prefix_rep" "1") + (set_attr "mode" "SI")]) -(define_insn "sse2_maskcmpv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (match_operator:V2DF 3 "sse_comparison_operator" - [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))] - "TARGET_SSE2 && !TARGET_SSE5" - "cmp%D3pd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "V2DF")]) +(define_insn "sse_cvtss2si_2" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE" + "cvtss2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double") + (set_attr "prefix_rep" "1") + (set_attr "mode" "SI")]) -(define_insn "sse2_maskcmpdf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (match_operator:DF 3 "sse_comparison_operator" - [(match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "nonimmediate_operand" "xm")]))] - "TARGET_SSE2 && !TARGET_SSE5" - "cmp%D3sd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "DF")]) +(define_insn "sse_cvtss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (unspec:DI + [(vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE && TARGET_64BIT" + "cvtss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "prefix_rep" "1") + (set_attr "mode" "DI")]) -(define_insn "sse2_vmmaskcmpv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (match_operator:V2DF 3 "sse_comparison_operator" - [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")]) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2 && !TARGET_SSE5" - "cmp%D3sd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "DF")]) +(define_insn "sse_cvtss2siq_2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE && TARGET_64BIT" + "cvtss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double") + (set_attr "prefix_rep" "1") + (set_attr "mode" "DI")]) -(define_insn "sse2_comi" - [(set (reg:CCFP FLAGS_REG) - (compare:CCFP - (vec_select:DF - (match_operand:V2DF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" "xm") +(define_insn "sse_cvttss2si" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI + (vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,m") (parallel [(const_int 0)]))))] - "TARGET_SSE2" - "comisd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecomi") - (set_attr "mode" "DF")]) + "TARGET_SSE" + "cvttss2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double") + (set_attr "prefix_rep" "1") + (set_attr "mode" "SI")]) -(define_insn "sse2_ucomi" - [(set (reg:CCFPU FLAGS_REG) - (compare:CCFPU - (vec_select:DF - (match_operand:V2DF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" "xm") +(define_insn "sse_cvttss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI + (vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,m") (parallel [(const_int 0)]))))] - "TARGET_SSE2" - "ucomisd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecomi") - (set_attr "mode" "DF")]) - -(define_expand "vcondv2df" - [(set (match_operand:V2DF 0 "register_operand" "") - (if_then_else:V2DF - (match_operator 3 "" - [(match_operand:V2DF 4 "nonimmediate_operand" "") - (match_operand:V2DF 5 "nonimmediate_operand" "")]) - (match_operand:V2DF 1 "general_operand" "") - (match_operand:V2DF 2 "general_operand" "")))] - "TARGET_SSE2" -{ - if (ix86_expand_fp_vcond (operands)) - DONE; - else - FAIL; -}) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; Parallel double-precision floating point logical operations -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define_expand "andv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);") - -(define_insn "*andv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)" - "andpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_nandv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0")) - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "andnpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_expand "iorv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);") - -(define_insn "*iorv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)" - "orpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_expand "xorv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);") - -(define_insn "*xorv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)" - "xorpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -;; Also define scalar versions. These are used for abs, neg, and -;; conditional move. Using subregs into vector modes causes register -;; allocation lossage. These patterns do not allow memory operands -;; because the native instructions read the full 128-bits. - -(define_insn "*anddf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (and:DF (match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "register_operand" "x")))] - "TARGET_SSE2" - "andpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) + "TARGET_SSE && TARGET_64BIT" + "cvttss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double") + (set_attr "prefix_rep" "1") + (set_attr "mode" "DI")]) -(define_insn "*nanddf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (and:DF (not:DF (match_operand:DF 1 "register_operand" "0")) - (match_operand:DF 2 "register_operand" "x")))] +(define_insn "sse2_cvtdq2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] "TARGET_SSE2" - "andnpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) + "cvtdq2ps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) -(define_insn "*iordf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (ior:DF (match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "register_operand" "x")))] +(define_insn "sse2_cvtps2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX_NOTRUNC))] "TARGET_SSE2" - "orpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) + "cvtps2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) -(define_insn "*xordf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (xor:DF (match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "register_operand" "x")))] +(define_insn "sse2_cvttps2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] "TARGET_SSE2" - "xorpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) + "cvttps2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix_rep" "1") + (set_attr "mode" "TI")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -2698,7 +1886,7 @@ (define_insn "sse2_cvtsd2si_2" [(set (match_operand:SI 0 "register_operand" "=r,r") (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")] - UNSPEC_FIX_NOTRUNC))] + UNSPEC_FIX_NOTRUNC))] "TARGET_SSE2" "cvtsd2si\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") @@ -2724,7 +1912,7 @@ (define_insn "sse2_cvtsd2siq_2" [(set (match_operand:DI 0 "register_operand" "=r,r") (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")] - UNSPEC_FIX_NOTRUNC))] + UNSPEC_FIX_NOTRUNC))] "TARGET_SSE2 && TARGET_64BIT" "cvtsd2siq\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") @@ -3035,6 +2223,388 @@ DONE; }) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point element swizzling +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse_movhlps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0") + (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x")) + (parallel [(const_int 6) + (const_int 7) + (const_int 2) + (const_int 3)])))] + "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + movhlps\t{%2, %0|%0, %2} + movlps\t{%H2, %0|%0, %H2} + movhps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF,V2SF,V2SF")]) + +(define_insn "sse_movlhps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0") + (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x")) + (parallel [(const_int 0) + (const_int 1) + (const_int 4) + (const_int 5)])))] + "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)" + "@ + movlhps\t{%2, %0|%0, %2} + movhps\t{%2, %0|%0, %2} + movlps\t{%2, %H0|%H0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF,V2SF,V2SF")]) + +(define_insn "sse_unpckhps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "TARGET_SSE" + "unpckhps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_unpcklps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "TARGET_SSE" + "unpcklps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +;; These are modeled with the same vec_concat as the others so that we +;; capture users of shufps that can use the new instructions +(define_insn "sse3_movshdup" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (match_dup 1)) + (parallel [(const_int 1) + (const_int 1) + (const_int 7) + (const_int 7)])))] + "TARGET_SSE3" + "movshdup\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "prefix_rep" "1") + (set_attr "mode" "V4SF")]) + +(define_insn "sse3_movsldup" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (match_dup 1)) + (parallel [(const_int 0) + (const_int 0) + (const_int 6) + (const_int 6)])))] + "TARGET_SSE3" + "movsldup\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "prefix_rep" "1") + (set_attr "mode" "V4SF")]) + +(define_expand "sse_shufps" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:V4SF 1 "register_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "TARGET_SSE" +{ + int mask = INTVAL (operands[3]); + emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2], + GEN_INT ((mask >> 0) & 3), + GEN_INT ((mask >> 2) & 3), + GEN_INT (((mask >> 4) & 3) + 4), + GEN_INT (((mask >> 6) & 3) + 4))); + DONE; +}) + +(define_insn "sse_shufps_1" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (parallel [(match_operand 3 "const_0_to_3_operand" "") + (match_operand 4 "const_0_to_3_operand" "") + (match_operand 5 "const_4_to_7_operand" "") + (match_operand 6 "const_4_to_7_operand" "")])))] + "TARGET_SSE" +{ + int mask = 0; + mask |= INTVAL (operands[3]) << 0; + mask |= INTVAL (operands[4]) << 2; + mask |= (INTVAL (operands[5]) - 4) << 4; + mask |= (INTVAL (operands[6]) - 4) << 6; + operands[3] = GEN_INT (mask); + + return "shufps\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_storehps" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") + (parallel [(const_int 2) (const_int 3)])))] + "TARGET_SSE" + "@ + movhps\t{%1, %0|%0, %1} + movhlps\t{%1, %0|%0, %1} + movlps\t{%H1, %0|%0, %H1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + +(define_insn "sse_loadhps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") + (vec_concat:V4SF + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))] + "TARGET_SSE" + "@ + movhps\t{%2, %0|%0, %2} + movlhps\t{%2, %0|%0, %2} + movlps\t{%2, %H0|%H0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + +(define_insn "sse_storelps" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m") + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_SSE" + "@ + movlps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + +(define_insn "sse_loadlps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") + (vec_concat:V4SF + (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x") + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0") + (parallel [(const_int 2) (const_int 3)]))))] + "TARGET_SSE" + "@ + shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} + movlps\t{%2, %0|%0, %2} + movlps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog,ssemov,ssemov") + (set_attr "mode" "V4SF,V2SF,V2SF")]) + +(define_insn "sse_movss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 2 "register_operand" "x") + (match_operand:V4SF 1 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "movss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) + +(define_insn "*vec_dupv4sf" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_duplicate:V4SF + (match_operand:SF 1 "register_operand" "0")))] + "TARGET_SSE" + "shufps\t{$0, %0, %0|%0, %0, 0}" + [(set_attr "type" "sselog1") + (set_attr "mode" "V4SF")]) + +;; ??? In theory we can match memory for the MMX alternative, but allowing +;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE +;; alternatives pretty much forces the MMX alternative to be chosen. +(define_insn "*sse_concatv2sf" + [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y") + (vec_concat:V2SF + (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m") + (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))] + "TARGET_SSE" + "@ + unpcklps\t{%2, %0|%0, %2} + movss\t{%1, %0|%0, %1} + punpckldq\t{%2, %0|%0, %2} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") + (set_attr "mode" "V4SF,SF,DI,DI")]) + +(define_insn "*sse_concatv4sf" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_concat:V4SF + (match_operand:V2SF 1 "register_operand" " 0,0") + (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))] + "TARGET_SSE" + "@ + movlhps\t{%2, %0|%0, %2} + movhps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF,V2SF")]) + +(define_expand "vec_initv4sf" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE" +{ + ix86_expand_vector_init (false, operands[0], operands[1]); + DONE; +}) + +(define_insn "vec_setv4sf_0" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m") + (vec_merge:V4SF + (vec_duplicate:V4SF + (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF")) + (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0") + (const_int 1)))] + "TARGET_SSE" + "@ + movss\t{%2, %0|%0, %2} + movss\t{%2, %0|%0, %2} + movd\t{%2, %0|%0, %2} + #" + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) + +;; A subset is vec_setv4sf. +(define_insn "*vec_setv4sf_sse4_1" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (vec_duplicate:V4SF + (match_operand:SF 2 "nonimmediate_operand" "xm")) + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))] + "TARGET_SSE4_1" +{ + operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); + return "insertps\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V4SF")]) + +(define_insn "sse4_1_insertps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x") + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:SI 3 "const_0_to_255_operand" "n")] + UNSPEC_INSERTPS))] + "TARGET_SSE4_1" + "insertps\t{%3, %2, %0|%0, %2, %3}"; + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V4SF")]) + +(define_split + [(set (match_operand:V4SF 0 "memory_operand" "") + (vec_merge:V4SF + (vec_duplicate:V4SF + (match_operand:SF 1 "nonmemory_operand" "")) + (match_dup 0) + (const_int 1)))] + "TARGET_SSE && reload_completed" + [(const_int 0)] +{ + emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]); + DONE; +}) + +(define_expand "vec_setv4sf" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:SF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_insn_and_split "*vec_extractv4sf_0" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr") + (vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m") + (parallel [(const_int 0)])))] + "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op1 = operands[1]; + if (REG_P (op1)) + op1 = gen_rtx_REG (SFmode, REGNO (op1)); + else + op1 = gen_lowpart (SFmode, op1); + emit_move_insn (operands[0], op1); + DONE; +}) + +(define_insn "*sse4_1_extractps" + [(set (match_operand:SF 0 "nonimmediate_operand" "=rm") + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))] + "TARGET_SSE4_1" + "extractps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V4SF")]) + +(define_insn_and_split "*vec_extract_v4sf_mem" + [(set (match_operand:SF 0 "register_operand" "=x*rf") + (vec_select:SF + (match_operand:V4SF 1 "memory_operand" "o") + (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))] + "" + "#" + "reload_completed" + [(const_int 0)] +{ + int i = INTVAL (operands[2]); + + emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4)); + DONE; +}) + +(define_expand "vec_extractv4sf" + [(match_operand:SF 0 "register_operand" "") + (match_operand:V4SF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + ix86_expand_vector_extract (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -5847,23 +5417,15 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "sse_movmskps" +(define_insn "<sse>_movmskp<ssemodesuffixf2c>" [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] - UNSPEC_MOVMSK))] - "TARGET_SSE" - "movmskps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "sse2_movmskpd" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")] - UNSPEC_MOVMSK))] - "TARGET_SSE2" - "movmskpd\t{%1, %0|%0, %1}" + (unspec:SI + [(match_operand:SSEMODEF2P 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) + (set_attr "mode" "<MODE>")]) (define_insn "sse2_pmovmskb" [(set (match_operand:SI 0 "register_operand" "=r") @@ -6017,7 +5579,12 @@ "monitor" [(set_attr "length" "3")]) -;; SSSE3 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; SSSE3 instructions +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + (define_insn "ssse3_phaddwv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") (vec_concat:V8HI @@ -6536,7 +6103,7 @@ [(set (match_operand:V16QI 0 "register_operand" "=x") (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") (match_operand:V16QI 2 "nonimmediate_operand" "xm")] - UNSPEC_PSHUFB))] + UNSPEC_PSHUFB))] "TARGET_SSSE3" "pshufb\t{%2, %0|%0, %2}"; [(set_attr "type" "sselog1") @@ -6548,7 +6115,7 @@ [(set (match_operand:V8QI 0 "register_operand" "=y") (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")] - UNSPEC_PSHUFB))] + UNSPEC_PSHUFB))] "TARGET_SSSE3" "pshufb\t{%2, %0|%0, %2}"; [(set_attr "type" "sselog1") @@ -6557,9 +6124,10 @@ (define_insn "ssse3_psign<mode>3" [(set (match_operand:SSEMODE124 0 "register_operand" "=x") - (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0") - (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")] - UNSPEC_PSIGN))] + (unspec:SSEMODE124 + [(match_operand:SSEMODE124 1 "register_operand" "0") + (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")] + UNSPEC_PSIGN))] "TARGET_SSSE3" "psign<ssevecsize>\t{%2, %0|%0, %2}"; [(set_attr "type" "sselog1") @@ -6569,9 +6137,10 @@ (define_insn "ssse3_psign<mode>3" [(set (match_operand:MMXMODEI 0 "register_operand" "=y") - (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0") - (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")] - UNSPEC_PSIGN))] + (unspec:MMXMODEI + [(match_operand:MMXMODEI 1 "register_operand" "0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")] + UNSPEC_PSIGN))] "TARGET_SSSE3" "psign<mmxvecsize>\t{%2, %0|%0, %2}"; [(set_attr "type" "sselog1") @@ -6583,7 +6152,7 @@ (unspec:TI [(match_operand:TI 1 "register_operand" "0") (match_operand:TI 2 "nonimmediate_operand" "xm") (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] - UNSPEC_PALIGNR))] + UNSPEC_PALIGNR))] "TARGET_SSSE3" { operands[3] = GEN_INT (INTVAL (operands[3]) / 8); @@ -6599,7 +6168,7 @@ (unspec:DI [(match_operand:DI 1 "register_operand" "0") (match_operand:DI 2 "nonimmediate_operand" "ym") (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] - UNSPEC_PALIGNR))] + UNSPEC_PALIGNR))] "TARGET_SSSE3" { operands[3] = GEN_INT (INTVAL (operands[3]) / 8); @@ -6634,45 +6203,27 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_insn "sse4a_vmmovntv2df" - [(set (match_operand:DF 0 "memory_operand" "=m") - (unspec:DF [(vec_select:DF - (match_operand:V2DF 1 "register_operand" "x") - (parallel [(const_int 0)]))] - UNSPEC_MOVNT))] - "TARGET_SSE4A" - "movntsd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "DF")]) - -(define_insn "sse4a_movntdf" - [(set (match_operand:DF 0 "memory_operand" "=m") - (unspec:DF [(match_operand:DF 1 "register_operand" "x")] - UNSPEC_MOVNT))] +(define_insn "sse4a_movnt<mode>" + [(set (match_operand:MODEF 0 "memory_operand" "=m") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "x")] + UNSPEC_MOVNT))] "TARGET_SSE4A" - "movntsd\t{%1, %0|%0, %1}" + "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") - (set_attr "mode" "DF")]) - -(define_insn "sse4a_vmmovntv4sf" - [(set (match_operand:SF 0 "memory_operand" "=m") - (unspec:SF [(vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)]))] - UNSPEC_MOVNT))] - "TARGET_SSE4A" - "movntss\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "SF")]) + (set_attr "mode" "<MODE>")]) -(define_insn "sse4a_movntsf" - [(set (match_operand:SF 0 "memory_operand" "=m") - (unspec:SF [(match_operand:SF 1 "register_operand" "x")] - UNSPEC_MOVNT))] +(define_insn "sse4a_vmmovnt<mode>" + [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m") + (unspec:<ssescalarmode> + [(vec_select:<ssescalarmode> + (match_operand:SSEMODEF2P 1 "register_operand" "x") + (parallel [(const_int 0)]))] + UNSPEC_MOVNT))] "TARGET_SSE4A" - "movntss\t{%1, %0|%0, %1}" + "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") - (set_attr "mode" "SF")]) + (set_attr "mode" "<ssescalarmode>")]) (define_insn "sse4a_extrqi" [(set (match_operand:V2DI 0 "register_operand" "=x") @@ -6727,77 +6278,43 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_insn "sse4_1_blendpd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (match_operand:V2DF 2 "nonimmediate_operand" "xm") - (match_operand:V2DF 1 "register_operand" "0") +(define_insn "sse4_1_blendp<ssemodesuffixf2c>" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (vec_merge:SSEMODEF2P + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm") + (match_operand:SSEMODEF2P 1 "register_operand" "0") (match_operand:SI 3 "const_0_to_3_operand" "n")))] "TARGET_SSE4_1" - "blendpd\t{%3, %2, %0|%0, %2, %3}" + "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "mode" "V2DF")]) - -(define_insn "sse4_1_blendps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 2 "nonimmediate_operand" "xm") - (match_operand:V4SF 1 "register_operand" "0") - (match_operand:SI 3 "const_0_to_15_operand" "n")))] - "TARGET_SSE4_1" - "blendps\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1") - (set_attr "mode" "V4SF")]) - -(define_insn "sse4_1_blendvpd" - [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x") - (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0") - (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm") - (match_operand:V2DF 3 "register_operand" "Yz")] - UNSPEC_BLENDV))] - "TARGET_SSE4_1" - "blendvpd\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1") - (set_attr "mode" "V2DF")]) + (set_attr "mode" "<MODE>")]) -(define_insn "sse4_1_blendvps" - [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0") - (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm") - (match_operand:V4SF 3 "register_operand" "Yz")] - UNSPEC_BLENDV))] +(define_insn "sse4_1_blendvp<ssemodesuffixf2c>" + [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x") + (unspec:SSEMODEF2P + [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0") + (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm") + (match_operand:SSEMODEF2P 3 "register_operand" "Yz")] + UNSPEC_BLENDV))] "TARGET_SSE4_1" - "blendvps\t{%3, %2, %0|%0, %2, %3}" + "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "mode" "V4SF")]) - -(define_insn "sse4_1_dppd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_0_to_255_operand" "n")] - UNSPEC_DP))] - "TARGET_SSE4_1" - "dppd\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssemul") - (set_attr "prefix_extra" "1") - (set_attr "mode" "V2DF")]) + (set_attr "mode" "<MODE>")]) -(define_insn "sse4_1_dpps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_0_to_255_operand" "n")] - UNSPEC_DP))] +(define_insn "sse4_1_dpp<ssemodesuffixf2c>" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (unspec:SSEMODEF2P + [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "const_0_to_255_operand" "n")] + UNSPEC_DP))] "TARGET_SSE4_1" - "dpps\t{%3, %2, %0|%0, %2, %3}" + "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemul") (set_attr "prefix_extra" "1") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "<MODE>")]) (define_insn "sse4_1_movntdqa" [(set (match_operand:V2DI 0 "register_operand" "=x") @@ -7245,55 +6762,32 @@ (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) -(define_insn "sse4_1_roundpd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm") - (match_operand:SI 2 "const_0_to_15_operand" "n")] - UNSPEC_ROUND))] - "TARGET_ROUND" - "roundpd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "prefix_extra" "1") - (set_attr "mode" "V2DF")]) - -(define_insn "sse4_1_roundps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm") - (match_operand:SI 2 "const_0_to_15_operand" "n")] - UNSPEC_ROUND))] - "TARGET_ROUND" - "roundps\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "prefix_extra" "1") - (set_attr "mode" "V4SF")]) - -(define_insn "sse4_1_roundsd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x") - (match_operand:SI 3 "const_0_to_15_operand" "n")] - UNSPEC_ROUND) - (match_operand:V2DF 1 "register_operand" "0") - (const_int 1)))] +(define_insn "sse4_1_roundp<ssemodesuffixf2c>" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (unspec:SSEMODEF2P + [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm") + (match_operand:SI 2 "const_0_to_15_operand" "n")] + UNSPEC_ROUND))] "TARGET_ROUND" - "roundsd\t{%3, %2, %0|%0, %2, %3}" + "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ssecvt") (set_attr "prefix_extra" "1") - (set_attr "mode" "V2DF")]) + (set_attr "mode" "<MODE>")]) -(define_insn "sse4_1_roundss" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x") - (match_operand:SI 3 "const_0_to_15_operand" "n")] - UNSPEC_ROUND) - (match_operand:V4SF 1 "register_operand" "0") +(define_insn "sse4_1_rounds<ssemodesuffixf2c>" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (vec_merge:SSEMODEF2P + (unspec:SSEMODEF2P + [(match_operand:SSEMODEF2P 2 "register_operand" "x") + (match_operand:SI 3 "const_0_to_15_operand" "n")] + UNSPEC_ROUND) + (match_operand:SSEMODEF2P 1 "register_operand" "0") (const_int 1)))] "TARGET_ROUND" - "roundss\t{%3, %2, %0|%0, %2, %3}" + "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssecvt") (set_attr "prefix_extra" "1") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "<MODE>")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -8347,10 +7841,11 @@ ;; SSE5 permute instructions (define_insn "sse5_pperm" [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") - (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm") - (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x") - (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")] - UNSPEC_SSE5_PERMUTE))] + (unspec:V16QI + [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm") + (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x") + (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")] + UNSPEC_SSE5_PERMUTE))] "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg") @@ -8553,7 +8048,7 @@ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")] UNSPEC_FRCZ))] "TARGET_SSE5" - "frcz<ssesuffixf4>\t{%1, %0|%0, %1}" + "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt1") (set_attr "prefix_extra" "1") (set_attr "mode" "<MODE>")]) @@ -8567,8 +8062,8 @@ UNSPEC_FRCZ) (match_operand:SSEMODEF2P 1 "register_operand" "0") (const_int 1)))] - "TARGET_ROUND" - "frcz<ssesuffixf2s>\t{%2, %0|%0, %2}" + "TARGET_SSE5" + "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecvt1") (set_attr "prefix_extra" "1") (set_attr "mode" "<MODE>")]) @@ -8710,10 +8205,11 @@ ;; being added here to be complete. (define_insn "sse5_pcom_tf<mode>3" [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") - (unspec:SSEMODE1248 [(match_operand:SSEMODE1248 1 "register_operand" "x") - (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_int_operand" "n")] - UNSPEC_SSE5_TRUEFALSE))] + (unspec:SSEMODE1248 + [(match_operand:SSEMODE1248 1 "register_operand" "x") + (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "const_int_operand" "n")] + UNSPEC_SSE5_TRUEFALSE))] "TARGET_SSE5" { return ((INTVAL (operands[3]) != 0) diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index a6ddb8c967f..b5c66622c2a 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -4312,9 +4312,9 @@ [(unspec_volatile [(reg:DI 28)] UNSPEC_BLOCKAGE)] "" "" - [(set_attr "type" "unknown") - (set_attr "mode" "none") - (set_attr "length" "0")]) + [(set_attr "type" "ghost") + (set_attr "mode" "none") + (set_attr "length" "0")]) ;; Initialize $gp for RTP PIC. Operand 0 is the __GOTT_BASE__ symbol ;; and operand 1 is the __GOTT_INDEX__ symbol. @@ -5517,9 +5517,9 @@ [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)] "" "" - [(set_attr "type" "unknown") - (set_attr "mode" "none") - (set_attr "length" "0")]) + [(set_attr "type" "ghost") + (set_attr "mode" "none") + (set_attr "length" "0")]) (define_expand "epilogue" [(const_int 2)] diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h index 6c88757d24e..f6c9ee3c2e2 100644 --- a/gcc/config/rs6000/darwin.h +++ b/gcc/config/rs6000/darwin.h @@ -432,3 +432,6 @@ /* When generating kernel code or kexts, we don't use Altivec by default, as kernel code doesn't save/restore those registers. */ #define OS_MISSING_ALTIVEC (flag_mkernel || flag_apple_kext) + +/* Darwin has to rename some of the long double builtins. */ +#define SUBTARGET_INIT_BUILTINS darwin_patch_builtins () diff --git a/gcc/config/rs6000/eabispe.h b/gcc/config/rs6000/eabispe.h index 0acc17c7022..c3a3f2b4e53 100644 --- a/gcc/config/rs6000/eabispe.h +++ b/gcc/config/rs6000/eabispe.h @@ -31,7 +31,7 @@ #define SUBSUBTARGET_OVERRIDE_OPTIONS \ if (rs6000_select[1].string == NULL) \ rs6000_cpu = PROCESSOR_PPC8540; \ - if (!rs6000_explicit_options.abi) \ + if (!rs6000_explicit_options.spe_abi) \ rs6000_spe_abi = 1; \ if (!rs6000_explicit_options.float_gprs) \ rs6000_float_gprs = 1; \ diff --git a/gcc/config/rs6000/linuxspe.h b/gcc/config/rs6000/linuxspe.h index 1aea7442c54..c526cf8dce4 100644 --- a/gcc/config/rs6000/linuxspe.h +++ b/gcc/config/rs6000/linuxspe.h @@ -30,7 +30,7 @@ #define SUBSUBTARGET_OVERRIDE_OPTIONS \ if (rs6000_select[1].string == NULL) \ rs6000_cpu = PROCESSOR_PPC8540; \ - if (!rs6000_explicit_options.abi) \ + if (!rs6000_explicit_options.spe_abi) \ rs6000_spe_abi = 1; \ if (!rs6000_explicit_options.float_gprs) \ rs6000_float_gprs = 1; \ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 4ebea38388b..9b9fefda2af 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -171,7 +171,7 @@ int rs6000_long_double_type_size; /* IEEE quad extended precision long double. */ int rs6000_ieeequad; -/* Whether -mabi=altivec has appeared. */ +/* Nonzero to use AltiVec ABI. */ int rs6000_altivec_abi; /* Nonzero if we want SPE ABI extensions. */ @@ -262,12 +262,14 @@ int rs6000_alignment_flags; struct { bool aix_struct_ret; /* True if -maix-struct-ret was used. */ bool alignment; /* True if -malign- was used. */ - bool abi; /* True if -mabi=spe/nospe was used. */ + bool spe_abi; /* True if -mabi=spe/no-spe was used. */ + bool altivec_abi; /* True if -mabi=altivec/no-altivec used. */ bool spe; /* True if -mspe= was used. */ bool float_gprs; /* True if -mfloat-gprs= was used. */ bool isel; /* True if -misel was used. */ bool long_double; /* True if -mlong-double- was used. */ bool ieee; /* True if -mabi=ieee/ibmlongdouble used. */ + bool vrsave; /* True if -mvrsave was used. */ } rs6000_explicit_options; struct builtin_description @@ -667,6 +669,25 @@ struct processor_costs ppc8540_cost = { 1, /* prefetch streams /*/ }; +/* Instruction costs on E300C2 and E300C3 cores. */ +static const +struct processor_costs ppce300c2c3_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (19), /* divsi */ + COSTS_N_INSNS (19), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (4), /* dmul */ + COSTS_N_INSNS (18), /* sdiv */ + COSTS_N_INSNS (33), /* ddiv */ + 32, + 32, /* l1 cache */ + 256, /* l2 cache */ + 1, /* prefetch streams /*/ +}; + /* Instruction costs on POWER4 and POWER5 processors. */ static const struct processor_costs power4_cost = { @@ -1418,6 +1439,8 @@ rs6000_override_options (const char *default_cpu) {"8540", PROCESSOR_PPC8540, POWERPC_BASE_MASK | MASK_STRICT_ALIGN}, /* 8548 has a dummy entry for now. */ {"8548", PROCESSOR_PPC8540, POWERPC_BASE_MASK | MASK_STRICT_ALIGN}, + {"e300c2", PROCESSOR_PPCE300C2, POWERPC_BASE_MASK | MASK_SOFT_FLOAT}, + {"e300c3", PROCESSOR_PPCE300C3, POWERPC_BASE_MASK}, {"860", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT}, {"970", PROCESSOR_POWER4, POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64}, @@ -1524,6 +1547,14 @@ rs6000_override_options (const char *default_cpu) if (TARGET_E500) rs6000_isel = 1; + if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3) + { + if (TARGET_ALTIVEC) + error ("AltiVec not supported in this target"); + if (TARGET_SPE) + error ("Spe not supported in this target"); + } + /* If we are optimizing big endian systems for space, use the load/store multiple and string instructions. */ if (BYTES_BIG_ENDIAN && optimize_size) @@ -1590,11 +1621,18 @@ rs6000_override_options (const char *default_cpu) if (TARGET_XCOFF && TARGET_ALTIVEC) rs6000_altivec_abi = 1; - /* Set Altivec ABI as default for PowerPC64 Linux. */ - if (TARGET_ELF && TARGET_64BIT) + /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For + PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can + be explicitly overridden in either case. */ + if (TARGET_ELF) { - rs6000_altivec_abi = 1; - TARGET_ALTIVEC_VRSAVE = 1; + if (!rs6000_explicit_options.altivec_abi + && (TARGET_64BIT || TARGET_ALTIVEC)) + rs6000_altivec_abi = 1; + + /* Enable VRSAVE for AltiVec ABI, unless explicitly overridden. */ + if (!rs6000_explicit_options.vrsave) + TARGET_ALTIVEC_VRSAVE = rs6000_altivec_abi; } /* Set the Darwin64 ABI as default for 64-bit Darwin. */ @@ -1638,7 +1676,7 @@ rs6000_override_options (const char *default_cpu) /* For the powerpc-eabispe configuration, we set all these by default, so let's unset them if we manually set another CPU that is not the E500. */ - if (!rs6000_explicit_options.abi) + if (!rs6000_explicit_options.spe_abi) rs6000_spe_abi = 0; if (!rs6000_explicit_options.spe) rs6000_spe = 0; @@ -1836,6 +1874,11 @@ rs6000_override_options (const char *default_cpu) rs6000_cost = &ppc8540_cost; break; + case PROCESSOR_PPCE300C2: + case PROCESSOR_PPCE300C3: + rs6000_cost = &ppce300c2c3_cost; + break; + case PROCESSOR_POWER4: case PROCESSOR_POWER5: rs6000_cost = &power4_cost; @@ -2131,6 +2174,7 @@ rs6000_handle_option (size_t code, const char *arg, int value) break; case OPT_mvrsave_: + rs6000_explicit_options.vrsave = true; rs6000_parse_yes_no_option ("vrsave", arg, &(TARGET_ALTIVEC_VRSAVE)); break; @@ -2188,19 +2232,20 @@ rs6000_handle_option (size_t code, const char *arg, int value) case OPT_mabi_: if (!strcmp (arg, "altivec")) { - rs6000_explicit_options.abi = true; + rs6000_explicit_options.altivec_abi = true; rs6000_altivec_abi = 1; + + /* Enabling the AltiVec ABI turns off the SPE ABI. */ rs6000_spe_abi = 0; } else if (! strcmp (arg, "no-altivec")) { - /* ??? Don't set rs6000_explicit_options.abi here, to allow - the default for rs6000_spe_abi to be chosen later. */ + rs6000_explicit_options.altivec_abi = true; rs6000_altivec_abi = 0; } else if (! strcmp (arg, "spe")) { - rs6000_explicit_options.abi = true; + rs6000_explicit_options.spe_abi = true; rs6000_spe_abi = 1; rs6000_altivec_abi = 0; if (!TARGET_SPE_ABI) @@ -2208,7 +2253,7 @@ rs6000_handle_option (size_t code, const char *arg, int value) } else if (! strcmp (arg, "no-spe")) { - rs6000_explicit_options.abi = true; + rs6000_explicit_options.spe_abi = true; rs6000_spe_abi = 0; } @@ -3619,19 +3664,29 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, /* We accept [reg + reg] and [reg + OFFSET]. */ if (GET_CODE (x) == PLUS) - { - rtx op1 = XEXP (x, 0); - rtx op2 = XEXP (x, 1); + { + rtx op1 = XEXP (x, 0); + rtx op2 = XEXP (x, 1); + rtx y; - op1 = force_reg (Pmode, op1); + op1 = force_reg (Pmode, op1); - if (GET_CODE (op2) != REG - && (GET_CODE (op2) != CONST_INT - || !SPE_CONST_OFFSET_OK (INTVAL (op2)))) - op2 = force_reg (Pmode, op2); + if (GET_CODE (op2) != REG + && (GET_CODE (op2) != CONST_INT + || !SPE_CONST_OFFSET_OK (INTVAL (op2)) + || (GET_MODE_SIZE (mode) > 8 + && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8)))) + op2 = force_reg (Pmode, op2); - return gen_rtx_PLUS (Pmode, op1, op2); - } + /* We can't always do [reg + reg] for these, because [reg + + reg + offset] is not a legitimate addressing mode. */ + y = gen_rtx_PLUS (Pmode, op1, op2); + + if (GET_MODE_SIZE (mode) > 8 && REG_P (op2)) + return force_reg (Pmode, y); + else + return y; + } return force_reg (Pmode, x); } @@ -9166,6 +9221,10 @@ rs6000_init_builtins (void) if (built_in_decls [BUILT_IN_CLOG]) set_user_assembler_name (built_in_decls [BUILT_IN_CLOG], "__clog"); #endif + +#ifdef SUBTARGET_INIT_BUILTINS + SUBTARGET_INIT_BUILTINS; +#endif } /* Search through a set of builtins and enable the mask bits. @@ -18502,6 +18561,8 @@ rs6000_issue_rate (void) case CPU_PPC7400: case CPU_PPC8540: case CPU_CELL: + case CPU_PPCE300C2: + case CPU_PPCE300C3: return 2; case CPU_RIOS2: case CPU_PPC604: diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 6a64eae3dd9..7f7dd57e1e1 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -60,6 +60,18 @@ #define TARGET_PAIRED_FLOAT 0 #endif +#ifdef HAVE_AS_POPCNTB +#define ASM_CPU_POWER5_SPEC "-mpower5" +#else +#define ASM_CPU_POWER5_SPEC "-mpower4" +#endif + +#ifdef HAVE_AS_DFP +#define ASM_CPU_POWER6_SPEC "-mpower6 -maltivec" +#else +#define ASM_CPU_POWER6_SPEC "-mpower4 -maltivec" +#endif + /* Common ASM definitions used by ASM_SPEC among the various targets for handling -mcpu=xxx switches. */ #define ASM_CPU_SPEC \ @@ -76,10 +88,10 @@ %{mcpu=power2: -mpwrx} \ %{mcpu=power3: -mppc64} \ %{mcpu=power4: -mpower4} \ -%{mcpu=power5: -mpower4} \ -%{mcpu=power5+: -mpower4} \ -%{mcpu=power6: -mpower4 -maltivec} \ -%{mcpu=power6x: -mpower4 -maltivec} \ +%{mcpu=power5: %(asm_cpu_power5)} \ +%{mcpu=power5+: %(asm_cpu_power5)} \ +%{mcpu=power6: %(asm_cpu_power6) -maltivec} \ +%{mcpu=power6x: %(asm_cpu_power6) -maltivec} \ %{mcpu=powerpc: -mppc} \ %{mcpu=rios: -mpwr} \ %{mcpu=rios1: -mpwr} \ @@ -117,6 +129,8 @@ %{mcpu=G5: -mpower4 -maltivec} \ %{mcpu=8540: -me500} \ %{mcpu=8548: -me500} \ +%{mcpu=e300c2: -me300} \ +%{mcpu=e300c3: -me300} \ %{maltivec: -maltivec} \ -many" @@ -141,6 +155,8 @@ { "asm_cpu", ASM_CPU_SPEC }, \ { "asm_default", ASM_DEFAULT_SPEC }, \ { "cc1_cpu", CC1_CPU_SPEC }, \ + { "asm_cpu_power5", ASM_CPU_POWER5_SPEC }, \ + { "asm_cpu_power6", ASM_CPU_POWER6_SPEC }, \ SUBTARGET_EXTRA_SPECS /* -mcpu=native handling only makes sense with compiler running on @@ -262,6 +278,8 @@ enum processor_type PROCESSOR_PPC7400, PROCESSOR_PPC7450, PROCESSOR_PPC8540, + PROCESSOR_PPCE300C2, + PROCESSOR_PPCE300C3, PROCESSOR_POWER4, PROCESSOR_POWER5, PROCESSOR_POWER6, @@ -596,6 +614,7 @@ extern enum rs6000_nop_insertion rs6000_sched_insert_nops; Make vector constants quadword aligned. */ #define CONSTANT_ALIGNMENT(EXP, ALIGN) \ (TREE_CODE (EXP) == STRING_CST \ + && (STRICT_ALIGNMENT || !optimize_size) \ && (ALIGN) < BITS_PER_WORD \ ? BITS_PER_WORD \ : (ALIGN)) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d1b43dc78af..777a1ecf46d 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -133,7 +133,7 @@ ;; Processor type -- this attribute must exactly match the processor_type ;; enumeration in rs6000.h. -(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,power4,power5,power6,cell" +(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppce300c2,ppce300c3,power4,power5,power6,cell" (const (symbol_ref "rs6000_cpu_attr"))) @@ -166,6 +166,7 @@ (include "7xx.md") (include "7450.md") (include "8540.md") +(include "e300c2c3.md") (include "power4.md") (include "power5.md") (include "power6.md") diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index 4088ef73cdf..dccbc1ea350 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -3838,6 +3838,7 @@ find_barrier (int num_mova, rtx mova, rtx from) rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0; int si_limit; int hi_limit; + rtx orig = from; /* For HImode: range is 510, add 4 because pc counts from address of second instruction after this one, subtract 2 for the jump instruction @@ -3897,6 +3898,7 @@ find_barrier (int num_mova, rtx mova, rtx from) if (GET_CODE (from) == BARRIER) { + rtx next; found_barrier = from; @@ -3905,6 +3907,14 @@ find_barrier (int num_mova, rtx mova, rtx from) this kind of barrier. */ if (barrier_align (from) > 2) good_barrier = from; + + /* If we are at the end of a hot/cold block, dump the constants + here. */ + next = NEXT_INSN (from); + if (next + && NOTE_P (next) + && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS) + break; } if (broken_move (from)) @@ -4061,7 +4071,8 @@ find_barrier (int num_mova, rtx mova, rtx from) /* If we exceeded the range, then we must back up over the last instruction we looked at. Otherwise, we just need to undo the NEXT_INSN at the end of the loop. */ - if (count_hi > hi_limit || count_si > si_limit) + if (PREV_INSN (from) != orig + && (count_hi > hi_limit || count_si > si_limit)) from = PREV_INSN (PREV_INSN (from)); else from = PREV_INSN (from); diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index d7e2727adac..5a580b517cd 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -7378,7 +7378,7 @@ label: (define_insn "jump_compact" [(set (pc) (label_ref (match_operand 0 "" "")))] - "TARGET_SH1" + "TARGET_SH1 && !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)" "* { /* The length is 16 if the delay slot is unfilled. */ diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md index f1bfdaff607..14854606b8d 100644 --- a/gcc/config/spu/spu.md +++ b/gcc/config/spu/spu.md @@ -4455,3 +4455,179 @@ selb\t%0,%4,%0,%3" DONE; }") + +(define_expand "vec_unpacku_hi_v8hi" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))] + "" +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, + 0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); + + DONE; +}) + +(define_expand "vec_unpacku_lo_v8hi" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] +"" +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B, + 0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); + + DONE; +}) + +(define_expand "vec_unpacks_hi_v8hi" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))] + "" +{ + rtx tmp1 = gen_reg_rtx (V8HImode); + rtx tmp2 = gen_reg_rtx (V4SImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, + 0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); + emit_insn (gen_spu_xshw (tmp2, tmp1)); + emit_move_insn (operands[0], tmp2); + + DONE; +}) + +(define_expand "vec_unpacks_lo_v8hi" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] +"" +{ + rtx tmp1 = gen_reg_rtx (V8HImode); + rtx tmp2 = gen_reg_rtx (V4SImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B, + 0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); + emit_insn (gen_spu_xshw (tmp2, tmp1)); + emit_move_insn (operands[0], tmp2); + +DONE; +}) + +(define_expand "vec_unpacku_hi_v16qi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (zero_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3) + (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] + "" +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03, + 0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); + + DONE; +}) + +(define_expand "vec_unpacku_lo_v16qi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (zero_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11) + (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))] +"" +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B, + 0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); + + DONE; +}) + +(define_expand "vec_unpacks_hi_v16qi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3) + (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] +"" +{ + rtx tmp1 = gen_reg_rtx (V16QImode); + rtx tmp2 = gen_reg_rtx (V8HImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03, + 0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); + emit_insn (gen_spu_xsbh (tmp2, tmp1)); + emit_move_insn (operands[0], tmp2); + + DONE; +}) + +(define_expand "vec_unpacks_lo_v16qi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11) + (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))] +"" +{ + rtx tmp1 = gen_reg_rtx (V16QImode); + rtx tmp2 = gen_reg_rtx (V8HImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B, + 0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); + emit_insn (gen_spu_xsbh (tmp2, tmp1)); + emit_move_insn (operands[0], tmp2); + +DONE; +}) + + + + |