summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2008-02-26 13:09:58 +0000
committerbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2008-02-26 13:09:58 +0000
commitb8053af55de78a3f080783e5113fd6452e5a43c5 (patch)
treea5906142e844e296abb7382e34657faf4e58f74f /gcc/config
parent4896274c9597b09d4c61bdd2efb3201a72634b3c (diff)
downloadgcc-b8053af55de78a3f080783e5113fd6452e5a43c5.tar.gz
2008-02-26 Basile Starynkevitch <basile@starynkevitch.net>
MELT branch merged with trunk r132671 Merged revisions 132452-132671 via svnmerge from svn+ssh://bstarynk@gcc.gnu.org/svn/gcc/trunk git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@132672 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/avr/libgcc.S50
-rw-r--r--gcc/config/cris/cris.h1
-rw-r--r--gcc/config/darwin-protos.h1
-rw-r--r--gcc/config/darwin.c48
-rw-r--r--gcc/config/h8300/h8300.c2
-rw-r--r--gcc/config/h8300/h8300.md3
-rw-r--r--gcc/config/i386/i386.c2
-rw-r--r--gcc/config/i386/i386.h3
-rw-r--r--gcc/config/i386/i386.md176
-rw-r--r--gcc/config/i386/mmx.md16
-rw-r--r--gcc/config/i386/netware-libgcc.exp9
-rw-r--r--gcc/config/i386/netware.c191
-rw-r--r--gcc/config/i386/netware.h24
-rw-r--r--gcc/config/i386/sse.md2722
-rw-r--r--gcc/config/mips/mips.md12
-rw-r--r--gcc/config/rs6000/darwin.h3
-rw-r--r--gcc/config/rs6000/eabispe.h2
-rw-r--r--gcc/config/rs6000/linuxspe.h2
-rw-r--r--gcc/config/rs6000/rs6000.c105
-rw-r--r--gcc/config/rs6000/rs6000.h27
-rw-r--r--gcc/config/rs6000/rs6000.md3
-rw-r--r--gcc/config/sh/sh.c13
-rw-r--r--gcc/config/sh/sh.md2
-rw-r--r--gcc/config/spu/spu.md176
24 files changed, 1736 insertions, 1857 deletions
diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S
index 397778b82d9..8fdba55f775 100644
--- a/gcc/config/avr/libgcc.S
+++ b/gcc/config/avr/libgcc.S
@@ -32,6 +32,7 @@ Boston, MA 02110-1301, USA. */
#define __SREG__ 0x3f
#define __SP_H__ 0x3e
#define __SP_L__ 0x3d
+#define __RAMPZ__ 0x3B
/* Most of the functions here are called directly from avr.md
patterns, instead of using the standard libcall mechanisms.
@@ -686,20 +687,54 @@ __tablejump__:
.endfunc
#endif /* defined (L_tablejump) */
-/* __do_copy_data is only necessary if there is anything in .data section.
- Does not use RAMPZ - crt*.o provides a replacement for >64K devices. */
-
#ifdef L_copy_data
.section .init4,"ax",@progbits
.global __do_copy_data
__do_copy_data:
+#if defined(__AVR_HAVE_ELPMX__)
+ ldi r17, hi8(__data_end)
+ ldi r26, lo8(__data_start)
+ ldi r27, hi8(__data_start)
+ ldi r30, lo8(__data_load_start)
+ ldi r31, hi8(__data_load_start)
+ ldi r16, hh8(__data_load_start)
+ out __RAMPZ__, r16
+ rjmp .L__do_copy_data_start
+.L__do_copy_data_loop:
+ elpm r0, Z+
+ st X+, r0
+.L__do_copy_data_start:
+ cpi r26, lo8(__data_end)
+ cpc r27, r17
+ brne .L__do_copy_data_loop
+#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
+ ldi r17, hi8(__data_end)
+ ldi r26, lo8(__data_start)
+ ldi r27, hi8(__data_start)
+ ldi r30, lo8(__data_load_start)
+ ldi r31, hi8(__data_load_start)
+ ldi r16, hh8(__data_load_start - 0x10000)
+.L__do_copy_data_carry:
+ inc r16
+ out __RAMPZ__, r16
+ rjmp .L__do_copy_data_start
+.L__do_copy_data_loop:
+ elpm
+ st X+, r0
+ adiw r30, 1
+ brcs .L__do_copy_data_carry
+.L__do_copy_data_start:
+ cpi r26, lo8(__data_end)
+ cpc r27, r17
+ brne .L__do_copy_data_loop
+#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
ldi r17, hi8(__data_end)
ldi r26, lo8(__data_start)
ldi r27, hi8(__data_start)
ldi r30, lo8(__data_load_start)
ldi r31, hi8(__data_load_start)
- rjmp .do_copy_data_start
-.do_copy_data_loop:
+ rjmp .L__do_copy_data_start
+.L__do_copy_data_loop:
#if defined (__AVR_HAVE_LPMX__)
lpm r0, Z+
#else
@@ -707,10 +742,11 @@ __do_copy_data:
adiw r30, 1
#endif
st X+, r0
-.do_copy_data_start:
+.L__do_copy_data_start:
cpi r26, lo8(__data_end)
cpc r27, r17
- brne .do_copy_data_loop
+ brne .L__do_copy_data_loop
+#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
#endif /* L_copy_data */
/* __do_clear_bss is only necessary if there is anything in .bss section. */
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
index 9e59b88216b..b8f4ae56713 100644
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -600,6 +600,7 @@ enum reg_class
#define REG_CLASS_FROM_LETTER(C) \
( \
(C) == 'a' ? ACR_REGS : \
+ (C) == 'b' ? GENNONACR_REGS : \
(C) == 'h' ? MOF_REGS : \
(C) == 'x' ? SPECIAL_REGS : \
(C) == 'c' ? CC0_REGS : \
diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h
index a8ce17c0111..c894bf05104 100644
--- a/gcc/config/darwin-protos.h
+++ b/gcc/config/darwin-protos.h
@@ -89,3 +89,4 @@ extern void darwin_cpp_builtins (struct cpp_reader *);
extern void darwin_asm_output_anchor (rtx symbol);
extern bool darwin_kextabi_p (void);
extern void darwin_override_options (void);
+extern void darwin_patch_builtins (void);
diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
index 307698d095f..13aa021f4c6 100644
--- a/gcc/config/darwin.c
+++ b/gcc/config/darwin.c
@@ -1735,4 +1735,52 @@ darwin_override_options (void)
flag_var_tracking_uninit = 1;
}
+/* Add $LDBL128 suffix to long double builtins. */
+
+static void
+darwin_patch_builtin (int fncode)
+{
+ tree fn = built_in_decls[fncode];
+ tree sym;
+ char *newname;
+
+ if (!fn)
+ return;
+
+ sym = DECL_ASSEMBLER_NAME (fn);
+ newname = alloca (IDENTIFIER_LENGTH (sym) + 10);
+ strcpy (newname, "_");
+ strcat (newname, IDENTIFIER_POINTER (sym));
+ strcat (newname, "$LDBL128");
+ set_user_assembler_name (fn, newname);
+ /*sym = get_identifier (newname);
+ SET_DECL_ASSEMBLER_NAME (fn, sym);*/
+
+ fn = implicit_built_in_decls[fncode];
+ if (fn)
+ set_user_assembler_name (fn, newname);
+ /*SET_DECL_ASSEMBLER_NAME (fn, sym);*/
+}
+
+void
+darwin_patch_builtins (void)
+{
+ if (LONG_DOUBLE_TYPE_SIZE != 128)
+ return;
+
+#define PATCH_BUILTIN(fncode) darwin_patch_builtin (fncode);
+#define PATCH_BUILTIN_NO64(fncode) \
+ if (!TARGET_64BIT) \
+ darwin_patch_builtin (fncode);
+#define PATCH_BUILTIN_VARIADIC(fncode) \
+ if (!TARGET_64BIT \
+ && (strverscmp (darwin_macosx_version_min, "10.3.9") >= 0)) \
+ darwin_patch_builtin (fncode);
+#include "darwin-ppc-ldouble-patch.def"
+#undef PATCH_BUILTIN
+#undef PATCH_BUILTIN_NO64
+#undef PATCH_BUILTIN_VARIADIC
+}
+
+
#include "gt-darwin.h"
diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c
index 96b6311ce7c..f90bd414735 100644
--- a/gcc/config/h8300/h8300.c
+++ b/gcc/config/h8300/h8300.c
@@ -930,7 +930,7 @@ h8300_expand_epilogue (void)
}
if (!returned_p)
- emit_insn (gen_rtx_RETURN (VOIDmode));
+ emit_jump_insn (gen_rtx_RETURN (VOIDmode));
}
/* Return nonzero if the current function is an interrupt
diff --git a/gcc/config/h8300/h8300.md b/gcc/config/h8300/h8300.md
index 08a8d2e9313..9b6c0aa4e16 100644
--- a/gcc/config/h8300/h8300.md
+++ b/gcc/config/h8300/h8300.md
@@ -3282,6 +3282,9 @@
if (GET_CODE (operands[0]) == MEM
|| GET_CODE (operands[3]) == MEM)
FAIL;
+
+ if (GET_CODE (operands[3]) != REG)
+ operands[3] = force_reg (HImode, operands[3]);
}")
(define_insn ""
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5dad2fcf515..5a4456d912b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12148,6 +12148,7 @@ ix86_expand_branch (enum rtx_code code, rtx label)
ix86_expand_branch (code, label);
return;
}
+ break;
case LE: case LEU: case GT: case GTU:
if (lo[1] == constm1_rtx)
{
@@ -12156,6 +12157,7 @@ ix86_expand_branch (enum rtx_code code, rtx label)
ix86_expand_branch (code, label);
return;
}
+ break;
default:
break;
}
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index fedac5643a4..f2429846691 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1454,6 +1454,9 @@ enum reg_class
#define SSE_FLOAT_MODE_P(MODE) \
((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
+#define SSE_VEC_FLOAT_MODE_P(MODE) \
+ ((TARGET_SSE && (MODE) == V4SFmode) || (TARGET_SSE2 && (MODE) == V2DFmode))
+
#define MMX_REG_P(XOP) (REG_P (XOP) && MMX_REGNO_P (REGNO (XOP)))
#define MMX_REGNO_P(N) IN_RANGE ((N), FIRST_MMX_REG, LAST_MMX_REG)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3c9c2cc8c86..92a37280f5f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -13964,25 +13964,15 @@
;; 0xffffffff is NaN, but not in normalized form, so we can't represent
;; it directly.
-(define_insn "*sse_setccsf"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (match_operator:SF 1 "sse_comparison_operator"
- [(match_operand:SF 2 "register_operand" "0")
- (match_operand:SF 3 "nonimmediate_operand" "xm")]))]
- "TARGET_SSE && !TARGET_SSE5"
- "cmp%D1ss\t{%3, %0|%0, %3}"
- [(set_attr "type" "ssecmp")
- (set_attr "mode" "SF")])
-
-(define_insn "*sse_setccdf"
- [(set (match_operand:DF 0 "register_operand" "=x")
- (match_operator:DF 1 "sse_comparison_operator"
- [(match_operand:DF 2 "register_operand" "0")
- (match_operand:DF 3 "nonimmediate_operand" "xm")]))]
- "TARGET_SSE2 && !TARGET_SSE5"
- "cmp%D1sd\t{%3, %0|%0, %3}"
+(define_insn "*sse_setcc<mode>"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (match_operator:MODEF 1 "sse_comparison_operator"
+ [(match_operand:MODEF 2 "register_operand" "0")
+ (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
+ "cmp%D1s<ssemodefsuffix>\t{%3, %0|%0, %3}"
[(set_attr "type" "ssecmp")
- (set_attr "mode" "DF")])
+ (set_attr "mode" "<MODE>")])
(define_insn "*sse5_setcc<mode>"
[(set (match_operand:MODEF 0 "register_operand" "=x")
@@ -19383,7 +19373,7 @@
(match_operand:DI 2 "general_operand" "")
(match_operand:DI 3 "general_operand" "")))]
"TARGET_64BIT"
- "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+ "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
(define_insn "x86_movdicc_0_m1_rex64"
[(set (match_operand:DI 0 "register_operand" "=r")
@@ -19437,7 +19427,7 @@
(match_operand:SI 2 "general_operand" "")
(match_operand:SI 3 "general_operand" "")))]
""
- "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+ "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
;; the register first winds up with `sbbl $0,reg', which is also weird.
@@ -19495,7 +19485,7 @@
(match_operand:HI 2 "general_operand" "")
(match_operand:HI 3 "general_operand" "")))]
"TARGET_HIMODE_MATH"
- "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+ "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
(define_insn "*movhicc_noc"
[(set (match_operand:HI 0 "register_operand" "=r,r")
@@ -19517,7 +19507,7 @@
(match_operand:QI 2 "general_operand" "")
(match_operand:QI 3 "general_operand" "")))]
"TARGET_QIMODE_MATH"
- "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+ "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
(define_insn_and_split "*movqicc_noc"
[(set (match_operand:QI 0 "register_operand" "=r,r")
@@ -19539,13 +19529,15 @@
[(set_attr "type" "icmov")
(set_attr "mode" "SI")])
-(define_expand "movsfcc"
- [(set (match_operand:SF 0 "register_operand" "")
- (if_then_else:SF (match_operand 1 "comparison_operator" "")
- (match_operand:SF 2 "register_operand" "")
- (match_operand:SF 3 "register_operand" "")))]
- "(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH"
- "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
+(define_expand "mov<mode>cc"
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (if_then_else:X87MODEF
+ (match_operand 1 "comparison_operator" "")
+ (match_operand:X87MODEF 2 "register_operand" "")
+ (match_operand:X87MODEF 3 "register_operand" "")))]
+ "(TARGET_80387 && TARGET_CMOVE)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+ "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
(define_insn "*movsfcc_1_387"
[(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
@@ -19563,14 +19555,6 @@
[(set_attr "type" "fcmov,fcmov,icmov,icmov")
(set_attr "mode" "SF,SF,SI,SI")])
-(define_expand "movdfcc"
- [(set (match_operand:DF 0 "register_operand" "")
- (if_then_else:DF (match_operand 1 "comparison_operator" "")
- (match_operand:DF 2 "register_operand" "")
- (match_operand:DF 3 "register_operand" "")))]
- "(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)"
- "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
-
(define_insn "*movdfcc_1"
[(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r")
(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
@@ -19623,14 +19607,6 @@
split_di (operands+3, 1, operands+7, operands+8);
split_di (operands, 1, operands+2, operands+3);")
-(define_expand "movxfcc"
- [(set (match_operand:XF 0 "register_operand" "")
- (if_then_else:XF (match_operand 1 "comparison_operator" "")
- (match_operand:XF 2 "register_operand" "")
- (match_operand:XF 3 "register_operand" "")))]
- "TARGET_80387 && TARGET_CMOVE"
- "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
-
(define_insn "*movxfcc_1"
[(set (match_operand:XF 0 "register_operand" "=f,f")
(if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
@@ -19663,41 +19639,25 @@
;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
;; are undefined in this condition, we're certain this is correct.
-(define_insn "sminsf3"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (smin:SF (match_operand:SF 1 "nonimmediate_operand" "%0")
- (match_operand:SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE_MATH"
- "minss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "SF")])
-
-(define_insn "smaxsf3"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (smax:SF (match_operand:SF 1 "nonimmediate_operand" "%0")
- (match_operand:SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE_MATH"
- "maxss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "SF")])
-
-(define_insn "smindf3"
- [(set (match_operand:DF 0 "register_operand" "=x")
- (smin:DF (match_operand:DF 1 "nonimmediate_operand" "%0")
- (match_operand:DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2 && TARGET_SSE_MATH"
- "minsd\t{%2, %0|%0, %2}"
+(define_insn "smin<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (smin:MODEF
+ (match_operand:MODEF 1 "nonimmediate_operand" "%0")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "mins<ssemodefsuffix>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
- (set_attr "mode" "DF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "smaxdf3"
- [(set (match_operand:DF 0 "register_operand" "=x")
- (smax:DF (match_operand:DF 1 "nonimmediate_operand" "%0")
- (match_operand:DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2 && TARGET_SSE_MATH"
- "maxsd\t{%2, %0|%0, %2}"
+(define_insn "smax<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (smax:MODEF
+ (match_operand:MODEF 1 "nonimmediate_operand" "%0")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "maxs<ssemodefsuffix>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
- (set_attr "mode" "DF")])
+ (set_attr "mode" "<MODE>")])
;; These versions of the min/max patterns implement exactly the operations
;; min = (op1 < op2 ? op1 : op2)
@@ -19705,45 +19665,27 @@
;; Their operands are not commutative, and thus they may be used in the
;; presence of -0.0 and NaN.
-(define_insn "*ieee_sminsf3"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (unspec:SF [(match_operand:SF 1 "register_operand" "0")
- (match_operand:SF 2 "nonimmediate_operand" "xm")]
- UNSPEC_IEEE_MIN))]
- "TARGET_SSE_MATH"
- "minss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "SF")])
-
-(define_insn "*ieee_smaxsf3"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (unspec:SF [(match_operand:SF 1 "register_operand" "0")
- (match_operand:SF 2 "nonimmediate_operand" "xm")]
- UNSPEC_IEEE_MAX))]
- "TARGET_SSE_MATH"
- "maxss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "SF")])
-
-(define_insn "*ieee_smindf3"
- [(set (match_operand:DF 0 "register_operand" "=x")
- (unspec:DF [(match_operand:DF 1 "register_operand" "0")
- (match_operand:DF 2 "nonimmediate_operand" "xm")]
- UNSPEC_IEEE_MIN))]
- "TARGET_SSE2 && TARGET_SSE_MATH"
- "minsd\t{%2, %0|%0, %2}"
+(define_insn "*ieee_smin<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "0")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "mins<ssemodefsuffix>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
- (set_attr "mode" "DF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "*ieee_smaxdf3"
- [(set (match_operand:DF 0 "register_operand" "=x")
- (unspec:DF [(match_operand:DF 1 "register_operand" "0")
- (match_operand:DF 2 "nonimmediate_operand" "xm")]
- UNSPEC_IEEE_MAX))]
- "TARGET_SSE2 && TARGET_SSE_MATH"
- "maxsd\t{%2, %0|%0, %2}"
+(define_insn "*ieee_smax<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "0")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "maxs<ssemodefsuffix>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
- (set_attr "mode" "DF")])
+ (set_attr "mode" "<MODE>")])
;; Make two stack loads independent:
;; fld aa fld aa
@@ -19778,7 +19720,7 @@
(match_operand:QI 2 "register_operand" "")
(match_operand:QI 3 "const_int_operand" "")]
""
- "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
+ "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
(define_expand "addhicc"
[(match_operand:HI 0 "register_operand" "")
@@ -19786,7 +19728,7 @@
(match_operand:HI 2 "register_operand" "")
(match_operand:HI 3 "const_int_operand" "")]
""
- "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
+ "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
(define_expand "addsicc"
[(match_operand:SI 0 "register_operand" "")
@@ -19794,7 +19736,7 @@
(match_operand:SI 2 "register_operand" "")
(match_operand:SI 3 "const_int_operand" "")]
""
- "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
+ "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
(define_expand "adddicc"
[(match_operand:DI 0 "register_operand" "")
@@ -19802,7 +19744,7 @@
(match_operand:DI 2 "register_operand" "")
(match_operand:DI 3 "const_int_operand" "")]
"TARGET_64BIT"
- "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
+ "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
;; Misc patterns (?)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ee819936f6e..3371161f82f 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -63,9 +63,9 @@
(define_insn "*mov<mode>_internal_rex64"
[(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
- "=rm,r,*y,*y ,m ,*y,Y2,x,x ,m,r,x")
+ "=rm,r,!y,!y ,m ,!y,Y2,x,x ,m,r,x")
(match_operand:MMXMODEI 1 "vector_move_operand"
- "Cr ,m,C ,*ym,*y,Y2,*y,C,xm,x,x,r"))]
+ "Cr ,m,C ,!ym,!y,Y2,!y,C,xm,x,x,r"))]
"TARGET_64BIT && TARGET_MMX
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
@@ -87,9 +87,9 @@
(define_insn "*mov<mode>_internal"
[(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
- "=*y,*y ,m ,*y ,*Y2,*Y2,*Y2 ,m ,*x,*x,*x,m ,?r ,?m")
+ "=!y,!y ,m ,!y ,*Y2,*Y2,*Y2 ,m ,*x,*x,*x,m ,?r ,?m")
(match_operand:MMXMODEI 1 "vector_move_operand"
- "C ,*ym,*y,*Y2,*y ,C ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))]
+ "C ,!ym,!y,*Y2,!y ,C ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))]
"TARGET_MMX
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
@@ -122,9 +122,9 @@
(define_insn "*movv2sf_internal_rex64"
[(set (match_operand:V2SF 0 "nonimmediate_operand"
- "=rm,r,*y ,*y ,m ,*y,Y2,x,x,x,m,r,x")
+ "=rm,r,!y ,!y ,m ,!y,Y2,x,x,x,m,r,x")
(match_operand:V2SF 1 "vector_move_operand"
- "Cr ,m ,C ,*ym,*y,Y2,*y,C,x,m,x,x,r"))]
+ "Cr ,m ,C ,!ym,!y,Y2,!y,C,x,m,x,x,r"))]
"TARGET_64BIT && TARGET_MMX
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
@@ -147,9 +147,9 @@
(define_insn "*movv2sf_internal"
[(set (match_operand:V2SF 0 "nonimmediate_operand"
- "=*y,*y ,m,*y ,*Y2,*x,*x,*x,m ,?r ,?m")
+ "=!y,!y ,m,!y ,*Y2,*x,*x,*x,m ,?r ,?m")
(match_operand:V2SF 1 "vector_move_operand"
- "C ,*ym,*y,*Y2,*y ,C ,*x,m ,*x,irm,r"))]
+ "C ,!ym,!y,*Y2,!y ,C ,*x,m ,*x,irm,r"))]
"TARGET_MMX
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
diff --git a/gcc/config/i386/netware-libgcc.exp b/gcc/config/i386/netware-libgcc.exp
index a3498c0e720..309cf754943 100644
--- a/gcc/config/i386/netware-libgcc.exp
+++ b/gcc/config/i386/netware-libgcc.exp
@@ -6,6 +6,8 @@
__addvsi3,
# __ashldi3,
# __ashrdi3,
+ __bswapdi2,
+ __bswapsi2,
__clzdi2,
__clzsi2,
__ctzdi2,
@@ -18,12 +20,18 @@
__divsc3,
# __divtc3,
__divxc3,
+ __emutls_get_address,
+ __emutls_register_common,
__ffsdi2,
__ffssi2,
__fixunsdfdi,
__fixunssfdi,
# __fixunstfdi,
__fixunsxfdi,
+ __floatundisf,
+ __floatundidf,
+# __floatunditf,
+ __floatundixf,
__gcc_bcmp,
__gcc_personality_v0,
# __lshrdi3,
@@ -64,6 +72,7 @@
_Unwind_GetDataRelBase,
_Unwind_GetGR,
_Unwind_GetIP,
+ _Unwind_GetIPInfo,
_Unwind_GetLanguageSpecificData,
_Unwind_GetRegionStart,
_Unwind_GetTextRelBase,
diff --git a/gcc/config/i386/netware.c b/gcc/config/i386/netware.c
index 0357baff169..63c26cc7551 100644
--- a/gcc/config/i386/netware.c
+++ b/gcc/config/i386/netware.c
@@ -1,6 +1,6 @@
/* Subroutines for insn-output.c for NetWare.
Contributed by Jan Beulich (jbeulich@novell.com)
- Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
+ Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
This file is part of GCC.
@@ -32,28 +32,25 @@ along with GCC; see the file COPYING3. If not see
#include "toplev.h"
#include "ggc.h"
-
-/* Return string which is the former assembler name modified with an
- underscore prefix and a suffix consisting of an atsign (@) followed
- by the number of bytes of arguments */
+/* Return string which is the function name, identified by ID, modified
+ with PREFIX and a suffix consisting of an atsign (@) followed by the
+ number of bytes of arguments. If ID is NULL use the DECL_NAME as base.
+ Return NULL if no change required. */
static tree
-gen_stdcall_or_fastcall_decoration (tree decl, char prefix)
+gen_stdcall_or_fastcall_decoration (tree decl, tree id, char prefix)
{
- unsigned total = 0;
- /* ??? This probably should use XSTR (XEXP (DECL_RTL (decl), 0), 0) instead
- of DECL_ASSEMBLER_NAME. */
- const char *asmname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
- char *newsym;
+ unsigned HOST_WIDE_INT total = 0;
+ const char *old_str = IDENTIFIER_POINTER (id != NULL_TREE ? id : DECL_NAME (decl));
+ char *new_str;
tree type = TREE_TYPE (decl);
- tree arg;
- function_args_iterator args_iter;
if (prototype_p (type))
{
- /* These attributes are ignored for variadic functions in
- i386.c:ix86_return_pops_args. For compatibility with MS
- compiler do not add @0 suffix here. */
+ tree arg;
+ function_args_iterator args_iter;
+
+ /* This attribute is ignored for variadic functions. */
if (stdarg_p (type))
return NULL_TREE;
@@ -61,50 +58,50 @@ gen_stdcall_or_fastcall_decoration (tree decl, char prefix)
by convert_arguments in c-typeck.c or cp/typeck.c. */
FOREACH_FUNCTION_ARGS(type, arg, args_iter)
{
- unsigned parm_size;
+ HOST_WIDE_INT parm_size;
+ unsigned HOST_WIDE_INT parm_boundary_bytes;
if (! COMPLETE_TYPE_P (arg))
break;
- parm_size = int_size_in_bytes (TYPE_SIZE (arg));
+ parm_size = int_size_in_bytes (arg);
if (parm_size < 0)
break;
+ parm_boundary_bytes = PARM_BOUNDARY / BITS_PER_UNIT;
+
/* Must round up to include padding. This is done the same
way as in store_one_arg. */
- parm_size = ((parm_size + PARM_BOUNDARY - 1)
- / PARM_BOUNDARY * PARM_BOUNDARY);
- total += parm_size;
+ total += (parm_size + parm_boundary_bytes - 1)
+ / parm_boundary_bytes * parm_boundary_bytes;
}
}
- newsym = alloca (1 + strlen (asmname) + 1 + 10 + 1);
- return get_identifier_with_length (newsym,
- sprintf (newsym,
- "%c%s@%u",
- prefix,
- asmname,
- total / BITS_PER_UNIT));
+ new_str = alloca (1 + strlen (old_str) + 1 + 10 + 1);
+ sprintf (new_str, "%c%s@" HOST_WIDE_INT_PRINT_UNSIGNED,
+ prefix, old_str, total);
+
+ return get_identifier (new_str);
}
-/* Return string which is the former assembler name modified with an
- _n@ prefix where n represents the number of arguments passed in
- registers */
+/* Return string which is the function name, identified by ID, modified
+ with an _n@ prefix (where n represents the number of arguments passed in
+ registers). If ID is NULL use the DECL_NAME as base.
+ Return NULL if no change required. */
static tree
-gen_regparm_prefix (tree decl, unsigned nregs)
+gen_regparm_prefix (tree decl, tree id, unsigned int nregs)
{
- unsigned total = 0;
- /* ??? This probably should use XSTR (XEXP (DECL_RTL (decl), 0), 0) instead
- of DECL_ASSEMBLER_NAME. */
- const char *asmname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
- char *newsym;
+ unsigned HOST_WIDE_INT total = 0;
+ const char *old_str = IDENTIFIER_POINTER (id != NULL_TREE ? id : DECL_NAME (decl));
+ char *new_str;
tree type = TREE_TYPE (decl);
- tree arg;
- function_args_iterator args_iter;
if (prototype_p (type))
{
+ tree arg;
+ function_args_iterator args_iter;
+
/* This attribute is ignored for variadic functions. */
if (stdarg_p (type))
return NULL_TREE;
@@ -113,7 +110,8 @@ gen_regparm_prefix (tree decl, unsigned nregs)
by convert_arguments in c-typeck.c or cp/typeck.c. */
FOREACH_FUNCTION_ARGS(type, arg, args_iter)
{
- unsigned parm_size;
+ HOST_WIDE_INT parm_size;
+ unsigned HOST_WIDE_INT parm_boundary_bytes;
if (! COMPLETE_TYPE_P (arg))
break;
@@ -122,21 +120,58 @@ gen_regparm_prefix (tree decl, unsigned nregs)
if (parm_size < 0)
break;
- parm_size = ((parm_size + PARM_BOUNDARY - 1)
- / PARM_BOUNDARY * PARM_BOUNDARY);
- total += parm_size;
+ parm_boundary_bytes = PARM_BOUNDARY / BITS_PER_UNIT;
+
+ /* Must round up to include padding. This is done the same
+ way as in store_one_arg. */
+ total += (parm_size + parm_boundary_bytes - 1)
+ / parm_boundary_bytes * parm_boundary_bytes;
}
}
- if (nregs > total / BITS_PER_WORD)
- nregs = total / BITS_PER_WORD;
+ if (nregs > total / UNITS_PER_WORD)
+ nregs = total / UNITS_PER_WORD;
gcc_assert (nregs <= 9);
- newsym = alloca (3 + strlen (asmname) + 1);
- return get_identifier_with_length (newsym,
- sprintf (newsym,
- "_%u@%s",
- nregs,
- asmname));
+ new_str = alloca (3 + strlen (old_str) + 1);
+ sprintf (new_str, "_%u@%s", nregs, old_str);
+
+ return get_identifier (new_str);
+}
+
+/* Maybe decorate and get a new identifier for the DECL of a stdcall or
+ fastcall function. The original identifier is supplied in ID. */
+
+static tree
+i386_nlm_maybe_mangle_decl_assembler_name (tree decl, tree id)
+{
+ tree type_attributes = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+ tree new_id;
+
+ if (lookup_attribute ("stdcall", type_attributes))
+ new_id = gen_stdcall_or_fastcall_decoration (decl, id, '_');
+ else if (lookup_attribute ("fastcall", type_attributes))
+ new_id = gen_stdcall_or_fastcall_decoration (decl, id, FASTCALL_PREFIX);
+ else if ((new_id = lookup_attribute ("regparm", type_attributes)))
+ new_id = gen_regparm_prefix (decl, id,
+ TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (new_id))));
+ else
+ new_id = NULL_TREE;
+
+ return new_id;
+}
+
+/* This is used as a target hook to modify the DECL_ASSEMBLER_NAME
+ in the language-independent default hook
+ langhooks.c:lhd_set_decl_assembler_name ()
+ and in cp/mangle.c:mangle_decl (). */
+tree
+i386_nlm_mangle_decl_assembler_name (tree decl, tree id)
+{
+ tree new_id = TREE_CODE (decl) == FUNCTION_DECL
+ ? i386_nlm_maybe_mangle_decl_assembler_name (decl, id)
+ : NULL_TREE;
+
+ return (new_id ? new_id : id);
}
void
@@ -146,31 +181,28 @@ i386_nlm_encode_section_info (tree decl, rtx rtl, int first)
if (first
&& TREE_CODE (decl) == FUNCTION_DECL
+ /* Do not change the identifier if a verbatim asmspec
+ or if stdcall suffix already added. */
&& *IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)) != '*'
&& !strchr (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), '@'))
{
- tree type_attributes = TYPE_ATTRIBUTES (TREE_TYPE (decl));
- tree newid;
-
- if (lookup_attribute ("stdcall", type_attributes))
- newid = gen_stdcall_or_fastcall_decoration (decl, '_');
- else if (lookup_attribute ("fastcall", type_attributes))
- newid = gen_stdcall_or_fastcall_decoration (decl, FASTCALL_PREFIX);
- else if ((newid = lookup_attribute ("regparm", type_attributes)) != NULL_TREE)
- newid = gen_regparm_prefix (decl,
- TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (newid))));
- if (newid != NULL_TREE)
- {
- rtx rtlname = XEXP (rtl, 0);
+ /* FIXME: In Ada, and perhaps other language frontends,
+ imported stdcall names may not yet have been modified.
+ Check and do it know. */
+ rtx symbol = XEXP (rtl, 0);
+ tree new_id;
+ tree old_id = DECL_ASSEMBLER_NAME (decl);
+
+ gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
- if (GET_CODE (rtlname) == MEM)
- rtlname = XEXP (rtlname, 0);
- XSTR (rtlname, 0) = IDENTIFIER_POINTER (newid);
+ if ((new_id = i386_nlm_maybe_mangle_decl_assembler_name (decl, old_id)))
+ {
/* These attributes must be present on first declaration,
change_decl_assembler_name will warn if they are added
later and the decl has been referenced, but duplicate_decls
- should catch the mismatch before this is called. */
- change_decl_assembler_name (decl, newid);
+ should catch the mismatch first. */
+ change_decl_assembler_name (decl, new_id);
+ XSTR (symbol, 0) = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
}
}
}
@@ -201,3 +233,24 @@ i386_nlm_strip_name_encoding (const char *str)
}
return name;
}
+
+/* Sometimes certain combinations of command options do not make
+ sense on a particular target machine. You can define a macro
+ `OVERRIDE_OPTIONS' to take account of this. This macro, if
+ defined, is executed once just after all the command options have
+ been parsed.
+
+ Don't use this macro to turn on various extra optimizations for
+ `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
+
+void
+netware_override_options (void)
+{
+ override_options ();
+
+ if (flag_pic)
+ {
+ error ("-fPIC and -fpic are not supported for this target");
+ flag_pic = 0;
+ }
+}
diff --git a/gcc/config/i386/netware.h b/gcc/config/i386/netware.h
index e7459a2fe79..d4f31e0bbb2 100644
--- a/gcc/config/i386/netware.h
+++ b/gcc/config/i386/netware.h
@@ -72,6 +72,18 @@ along with GCC; see the file COPYING3. If not see
#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | \
MASK_FLOAT_RETURNS | MASK_ALIGN_DOUBLE | MASK_MS_BITFIELD_LAYOUT)
+/* Sometimes certain combinations of command options do not make
+ sense on a particular target machine. You can define a macro
+ `OVERRIDE_OPTIONS' to take account of this. This macro, if
+ defined, is executed once just after all the command options have
+ been parsed.
+
+ Don't use this macro to turn on various extra optimizations for
+ `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
+#undef OVERRIDE_OPTIONS
+extern void netware_override_options (void);
+#define OVERRIDE_OPTIONS netware_override_options ()
+
#undef MATH_LIBRARY
#define MATH_LIBRARY ""
@@ -142,13 +154,15 @@ along with GCC; see the file COPYING3. If not see
function named by the symbol (such as what section it is in).
On i386 running NetWare, modify the assembler name with an underscore (_)
- prefix and a suffix consisting of an atsign (@) followed by a string of
- digits that represents the number of bytes of arguments passed to the
- function, if it has the attribute STDCALL. Alternatively, if it has the
- REGPARM attribute, prefix it with an underscore (_), a digit representing
- the number of registers used, and an atsign (@). */
+ or atsign (@) prefix and a suffix consisting of an atsign (@) followed by
+ a string of digits that represents the number of bytes of arguments passed
+ to the function, if it has the attribute STDCALL. Alternatively, if it has
+ the REGPARM attribute, prefix it with an underscore (_), a digit
+ representing the number of registers used, and an atsign (@). */
void i386_nlm_encode_section_info (tree, rtx, int);
+extern tree i386_nlm_mangle_decl_assembler_name (tree, tree);
const char *i386_nlm_strip_name_encoding (const char *);
#define SUBTARGET_ENCODE_SECTION_INFO i386_nlm_encode_section_info
+#define TARGET_MANGLE_DECL_ASSEMBLER_NAME i386_nlm_mangle_decl_assembler_name
#undef TARGET_STRIP_NAME_ENCODING
#define TARGET_STRIP_NAME_ENCODING i386_nlm_strip_name_encoding
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 43f7ced8c03..97250dbd2ed 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -36,6 +36,9 @@
(define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
(define_mode_iterator SSEMODEF2P [V4SF V2DF])
+;; Mapping from float mode to required SSE level
+(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
+
;; Mapping from integer vector mode to mnemonic suffix
(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
@@ -137,12 +140,12 @@
gcc_unreachable ();
})
-(define_expand "movv4sf"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
- (match_operand:V4SF 1 "nonimmediate_operand" ""))]
+(define_expand "mov<mode>"
+ [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" ""))]
"TARGET_SSE"
{
- ix86_expand_vector_move (V4SFmode, operands);
+ ix86_expand_vector_move (<MODE>mode, operands);
DONE;
})
@@ -181,15 +184,6 @@
operands[2] = CONST0_RTX (V4SFmode);
})
-(define_expand "movv2df"
- [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
- (match_operand:V2DF 1 "nonimmediate_operand" ""))]
- "TARGET_SSE"
-{
- ix86_expand_vector_move (V2DFmode, operands);
- DONE;
-})
-
(define_insn "*movv2df_internal"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
(match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
@@ -249,23 +243,16 @@
DONE;
})
-(define_insn "sse_movups"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
- UNSPEC_MOVU))]
- "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "movups\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_movupd"
- [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
- (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
- UNSPEC_MOVU))]
- "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "movupd\t{%1, %0|%0, %1}"
+(define_insn "<sse>_movup<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "<MODE>")])
(define_insn "sse2_movdqu"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
@@ -277,23 +264,15 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
-(define_insn "sse_movntv4sf"
- [(set (match_operand:V4SF 0 "memory_operand" "=m")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
- UNSPEC_MOVNT))]
- "TARGET_SSE"
- "movntps\t{%1, %0|%0, %1}"
+(define_insn "<sse>_movnt<mode>"
+ [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse2_movntv2df"
- [(set (match_operand:V2DF 0 "memory_operand" "=m")
- (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
- UNSPEC_MOVNT))]
- "TARGET_SSE2"
- "movntpd\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "<MODE>")])
(define_insn "sse2_movntv2di"
[(set (match_operand:V2DI 0 "memory_operand" "=m")
@@ -328,18 +307,20 @@
; that directly map to insns are defined; it would be possible to
; define patterns for other modes that would expand to several insns.
-(define_expand "storentv4sf"
- [(set (match_operand:V4SF 0 "memory_operand" "")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")]
- UNSPEC_MOVNT))]
- "TARGET_SSE"
+(define_expand "storent<mode>"
+ [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "")]
+ UNSPEC_MOVNT))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
"")
-(define_expand "storentv2df"
- [(set (match_operand:V2DF 0 "memory_operand" "")
- (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "")]
- UNSPEC_MOVNT))]
- "TARGET_SSE2"
+(define_expand "storent<mode>"
+ [(set (match_operand:MODEF 0 "memory_operand" "")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE4A"
"")
(define_expand "storentv2di"
@@ -356,121 +337,120 @@
"TARGET_SSE2"
"")
-(define_expand "storentdf"
- [(set (match_operand:DF 0 "memory_operand" "")
- (unspec:DF [(match_operand:DF 1 "register_operand" "")]
- UNSPEC_MOVNT))]
- "TARGET_SSE4A"
- "")
-
-(define_expand "storentsf"
- [(set (match_operand:SF 0 "memory_operand" "")
- (unspec:SF [(match_operand:SF 1 "register_operand" "")]
- UNSPEC_MOVNT))]
- "TARGET_SSE4A"
- "")
-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; Parallel single-precision floating point arithmetic
+;; Parallel floating point arithmetic
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_expand "negv4sf2"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (neg:V4SF (match_operand:V4SF 1 "register_operand" "")))]
- "TARGET_SSE"
- "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
+(define_expand "neg<mode>2"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (neg:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_expand_fp_absneg_operator (NEG, <MODE>mode, operands); DONE;")
-(define_expand "absv4sf2"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (abs:V4SF (match_operand:V4SF 1 "register_operand" "")))]
- "TARGET_SSE"
- "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
+(define_expand "abs<mode>2"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (abs:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_expand_fp_absneg_operator (ABS, <MODE>mode, operands); DONE;")
-(define_expand "addv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
+(define_expand "add<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (plus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
-(define_insn "*addv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
- "addps\t{%2, %0|%0, %2}"
+(define_insn "*add<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (plus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+ "addp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "sse_vmaddv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+(define_insn "<sse>_vmadd<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (plus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
- "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
- "addss\t{%2, %0|%0, %2}"
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
+ "adds<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "<ssescalarmode>")])
-(define_expand "subv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
+(define_expand "sub<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
-(define_insn "*subv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "subps\t{%2, %0|%0, %2}"
+(define_insn "*sub<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "subp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "sse_vmsubv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+(define_insn "<sse>_vmsub<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
- "TARGET_SSE"
- "subss\t{%2, %0|%0, %2}"
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "subs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "<ssescalarmode>")])
-(define_expand "mulv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
+(define_expand "mul<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
-(define_insn "*mulv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
- "mulps\t{%2, %0|%0, %2}"
+(define_insn "*mul<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+ "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "ssemul")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "sse_vmmulv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+(define_insn "<sse>_vmmul<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
- "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
- "mulss\t{%2, %0|%0, %2}"
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+ "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "ssemul")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "<ssescalarmode>")])
(define_expand "divv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "")
@@ -490,31 +470,40 @@
}
})
-(define_insn "sse_divv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "divps\t{%2, %0|%0, %2}"
+(define_expand "divv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (div:V2DF (match_operand:V2DF 1 "register_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
+
+(define_insn "<sse>_div<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (div:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "ssediv")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "sse_vmdivv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+(define_insn "<sse>_vmdiv<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (div:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
- "TARGET_SSE"
- "divss\t{%2, %0|%0, %2}"
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "ssediv")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "<ssescalarmode>")])
(define_insn "sse_rcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF
- [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
"TARGET_SSE"
"rcpps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
@@ -532,6 +521,48 @@
[(set_attr "type" "sse")
(set_attr "mode" "SF")])
+(define_expand "sqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+{
+ if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations)
+ {
+ ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
+ DONE;
+ }
+})
+
+(define_insn "sse_sqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "sqrtps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sqrtv2df2"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "sqrtpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "<sse>_vmsqrt<mode>2"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (sqrt:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
+ (match_operand:SSEMODEF2P 2 "register_operand" "0")
+ (const_int 1)))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "<ssescalarmode>")])
+
(define_expand "rsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "")
(unspec:V4SF
@@ -563,126 +594,101 @@
[(set_attr "type" "sse")
(set_attr "mode" "SF")])
-(define_expand "sqrtv4sf2"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
- "TARGET_SSE"
-{
- if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
- && flag_finite_math_only && !flag_trapping_math
- && flag_unsafe_math_optimizations)
- {
- ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
- DONE;
- }
-})
-
-(define_insn "sse_sqrtv4sf2"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "sqrtps\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse_vmsqrtv4sf2"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
- (match_operand:V4SF 2 "register_operand" "0")
- (const_int 1)))]
- "TARGET_SSE"
- "sqrtss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
- (set_attr "mode" "SF")])
-
;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
;; isn't really correct, as those rtl operators aren't defined when
;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
-(define_expand "smaxv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
+(define_expand "smin<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (smin:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
{
if (!flag_finite_math_only)
- operands[1] = force_reg (V4SFmode, operands[1]);
- ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
})
-(define_insn "*smaxv4sf3_finite"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && flag_finite_math_only
- && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
- "maxps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")
- (set_attr "mode" "V4SF")])
+(define_insn "*smin<mode>3_finite"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (smin:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
+ && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
+ "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
-(define_insn "*smaxv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "maxps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")
- (set_attr "mode" "V4SF")])
+(define_insn "*smin<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (smin:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
-(define_insn "sse_vmsmaxv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+(define_insn "<sse>_vmsmin<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (smin:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
- "TARGET_SSE"
- "maxss\t{%2, %0|%0, %2}"
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "mins<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "<ssescalarmode>")])
-(define_expand "sminv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
+(define_expand "smax<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (smax:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
{
if (!flag_finite_math_only)
- operands[1] = force_reg (V4SFmode, operands[1]);
- ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
})
-(define_insn "*sminv4sf3_finite"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && flag_finite_math_only
- && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
- "minps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")
- (set_attr "mode" "V4SF")])
+(define_insn "*smax<mode>3_finite"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (smax:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
+ && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
+ "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
-(define_insn "*sminv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "minps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")
- (set_attr "mode" "V4SF")])
+(define_insn "*smax<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (smax:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
-(define_insn "sse_vmsminv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE"
- "minss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")
- (set_attr "mode" "SF")])
+(define_insn "<sse>_vmsmax<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (smax:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "maxs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<ssescalarmode>")])
;; These versions of the min/max patterns implement exactly the operations
;; min = (op1 < op2 ? op1 : op2)
@@ -690,45 +696,27 @@
;; Their operands are not commutative, and thus they may be used in the
;; presence of -0.0 and NaN.
-(define_insn "*ieee_sminv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
- UNSPEC_IEEE_MIN))]
- "TARGET_SSE"
- "minps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "V4SF")])
-
-(define_insn "*ieee_smaxv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
- UNSPEC_IEEE_MAX))]
- "TARGET_SSE"
- "maxps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "V4SF")])
-
-(define_insn "*ieee_sminv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
- UNSPEC_IEEE_MIN))]
- "TARGET_SSE2"
- "minpd\t{%2, %0|%0, %2}"
+(define_insn "*ieee_smin<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "*ieee_smaxv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
- UNSPEC_IEEE_MAX))]
- "TARGET_SSE2"
- "maxpd\t{%2, %0|%0, %2}"
+(define_insn "*ieee_smax<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "<MODE>")])
(define_insn "sse3_addsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -744,6 +732,19 @@
(set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
+(define_insn "sse3_addsubv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (plus:V2DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (minus:V2DF (match_dup 1) (match_dup 2))
+ (const_int 1)))]
+ "TARGET_SSE3"
+ "addsubpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
(define_insn "sse3_haddv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_concat:V4SF
@@ -771,6 +772,24 @@
(set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
+(define_insn "sse3_haddv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_concat:V2DF
+ (plus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+ (plus:DF
+ (vec_select:DF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_SSE3"
+ "haddpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
(define_insn "sse3_hsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_concat:V4SF
@@ -798,6 +817,24 @@
(set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
+(define_insn "sse3_hsubv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_concat:V2DF
+ (minus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+ (minus:DF
+ (vec_select:DF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_SSE3"
+ "hsubpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
(define_expand "reduc_splus_v4sf"
[(match_operand:V4SF 0 "register_operand" "")
(match_operand:V4SF 1 "register_operand" "")]
@@ -814,6 +851,15 @@
DONE;
})
+(define_expand "reduc_splus_v2df"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "register_operand" "")]
+ "TARGET_SSE3"
+{
+ emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
+ DONE;
+})
+
(define_expand "reduc_smax_v4sf"
[(match_operand:V4SF 0 "register_operand" "")
(match_operand:V4SF 1 "register_operand" "")]
@@ -834,80 +880,71 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; Parallel single-precision floating point comparisons
+;; Parallel floating point comparisons
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "sse_maskcmpv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (match_operator:V4SF 3 "sse_comparison_operator"
- [(match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
- "TARGET_SSE && !TARGET_SSE5"
- "cmp%D3ps\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssecmp")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse_maskcmpsf3"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (match_operator:SF 3 "sse_comparison_operator"
- [(match_operand:SF 1 "register_operand" "0")
- (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
- "TARGET_SSE && !TARGET_SSE5"
- "cmp%D3ss\t{%2, %0|%0, %2}"
+(define_insn "<sse>_maskcmp<mode>3"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
+ (match_operator:SSEMODEF4 3 "sse_comparison_operator"
+ [(match_operand:SSEMODEF4 1 "register_operand" "0")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
+ "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
+ && !TARGET_SSE5"
+ "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecmp")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "sse_vmmaskcmpv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (match_operator:V4SF 3 "sse_comparison_operator"
- [(match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "register_operand" "x")])
+(define_insn "<sse>_vmmaskcmp<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
+ [(match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
(match_dup 1)
(const_int 1)))]
- "TARGET_SSE && !TARGET_SSE5"
- "cmp%D3ss\t{%2, %0|%0, %2}"
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
+ "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecmp")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "<ssescalarmode>")])
-(define_insn "sse_comi"
+(define_insn "<sse>_comi"
[(set (reg:CCFP FLAGS_REG)
(compare:CCFP
- (vec_select:SF
- (match_operand:V4SF 0 "register_operand" "x")
+ (vec_select:MODEF
+ (match_operand:<ssevecmode> 0 "register_operand" "x")
(parallel [(const_int 0)]))
- (vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (vec_select:MODEF
+ (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))))]
- "TARGET_SSE"
- "comiss\t{%1, %0|%0, %1}"
+ "SSE_FLOAT_MODE_P (<MODE>mode)"
+ "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecomi")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "sse_ucomi"
+(define_insn "<sse>_ucomi"
[(set (reg:CCFPU FLAGS_REG)
(compare:CCFPU
- (vec_select:SF
- (match_operand:V4SF 0 "register_operand" "x")
+ (vec_select:MODEF
+ (match_operand:<ssevecmode> 0 "register_operand" "x")
(parallel [(const_int 0)]))
- (vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (vec_select:MODEF
+ (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))))]
- "TARGET_SSE"
- "ucomiss\t{%1, %0|%0, %1}"
+ "SSE_FLOAT_MODE_P (<MODE>mode)"
+ "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecomi")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "<MODE>")])
-(define_expand "vcondv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (if_then_else:V4SF
+(define_expand "vcond<mode>"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (if_then_else:SSEMODEF2P
(match_operator 3 ""
- [(match_operand:V4SF 4 "nonimmediate_operand" "")
- (match_operand:V4SF 5 "nonimmediate_operand" "")])
- (match_operand:V4SF 1 "general_operand" "")
- (match_operand:V4SF 2 "general_operand" "")))]
- "TARGET_SSE"
+ [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
+ (match_operand:SSEMODEF2P 1 "general_operand" "")
+ (match_operand:SSEMODEF2P 2 "general_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
{
if (ix86_expand_fp_vcond (operands))
DONE;
@@ -917,666 +954,123 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; Parallel single-precision floating point logical operations
+;; Parallel floating point logical operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_expand "andv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
+(define_expand "and<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (and:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
-(define_insn "*andv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
- "andps\t{%2, %0|%0, %2}"
+(define_insn "*and<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (and:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (AND, V4SFmode, operands)"
+ "andp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "sse_nandv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "andnps\t{%2, %0|%0, %2}"
+(define_insn "<sse>_nand<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (and:SSEMODEF2P
+ (not:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0"))
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "<MODE>")])
-(define_expand "iorv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
+(define_expand "ior<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (ior:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
-(define_insn "*iorv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
- "orps\t{%2, %0|%0, %2}"
+(define_insn "*ior<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (ior:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
+ "orp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "<MODE>")])
-(define_expand "xorv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
+(define_expand "xor<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (xor:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
-(define_insn "*xorv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
- "xorps\t{%2, %0|%0, %2}"
+(define_insn "*xor<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (xor:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
+ "xorp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "<MODE>")])
;; Also define scalar versions. These are used for abs, neg, and
;; conditional move. Using subregs into vector modes causes register
;; allocation lossage. These patterns do not allow memory operands
;; because the native instructions read the full 128-bits.
-(define_insn "*andsf3"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (and:SF (match_operand:SF 1 "register_operand" "0")
- (match_operand:SF 2 "register_operand" "x")))]
- "TARGET_SSE"
- "andps\t{%2, %0|%0, %2}"
+(define_insn "*and<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (and:MODEF
+ (match_operand:MODEF 1 "register_operand" "0")
+ (match_operand:MODEF 2 "register_operand" "x")))]
+ "SSE_FLOAT_MODE_P (<MODE>mode)"
+ "andp<ssemodefsuffix>\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
-
-(define_insn "*nandsf3"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
- (match_operand:SF 2 "register_operand" "x")))]
- "TARGET_SSE"
- "andnps\t{%2, %0|%0, %2}"
+ (set_attr "mode" "<ssevecmode>")])
+
+(define_insn "*nand<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (and:MODEF
+ (not:MODEF
+ (match_operand:MODEF 1 "register_operand" "0"))
+ (match_operand:MODEF 2 "register_operand" "x")))]
+ "SSE_FLOAT_MODE_P (<MODE>mode)"
+ "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
-
-(define_insn "*iorsf3"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (ior:SF (match_operand:SF 1 "register_operand" "0")
- (match_operand:SF 2 "register_operand" "x")))]
- "TARGET_SSE"
- "orps\t{%2, %0|%0, %2}"
+ (set_attr "mode" "<ssevecmode>")])
+
+(define_insn "*ior<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (ior:MODEF
+ (match_operand:MODEF 1 "register_operand" "0")
+ (match_operand:MODEF 2 "register_operand" "x")))]
+ "SSE_FLOAT_MODE_P (<MODE>mode)"
+ "orp<ssemodefsuffix>\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
-
-(define_insn "*xorsf3"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (xor:SF (match_operand:SF 1 "register_operand" "0")
- (match_operand:SF 2 "register_operand" "x")))]
- "TARGET_SSE"
- "xorps\t{%2, %0|%0, %2}"
+ (set_attr "mode" "<ssevecmode>")])
+
+(define_insn "*xor<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (xor:MODEF
+ (match_operand:MODEF 1 "register_operand" "0")
+ (match_operand:MODEF 2 "register_operand" "x")))]
+ "SSE_FLOAT_MODE_P (<MODE>mode)"
+ "xorp<ssemodefsuffix>\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; Parallel single-precision floating point conversion operations
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_insn "sse_cvtpi2ps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (vec_duplicate:V4SF
- (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
- (match_operand:V4SF 1 "register_operand" "0")
- (const_int 3)))]
- "TARGET_SSE"
- "cvtpi2ps\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssecvt")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse_cvtps2pi"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_select:V2SI
- (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
- UNSPEC_FIX_NOTRUNC)
- (parallel [(const_int 0) (const_int 1)])))]
- "TARGET_SSE"
- "cvtps2pi\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
- (set_attr "unit" "mmx")
- (set_attr "mode" "DI")])
-
-(define_insn "sse_cvttps2pi"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_select:V2SI
- (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0) (const_int 1)])))]
- "TARGET_SSE"
- "cvttps2pi\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
- (set_attr "unit" "mmx")
- (set_attr "mode" "SF")])
-
-(define_insn "sse_cvtsi2ss"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
- (vec_merge:V4SF
- (vec_duplicate:V4SF
- (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
- (match_operand:V4SF 1 "register_operand" "0,0")
- (const_int 1)))]
- "TARGET_SSE"
- "cvtsi2ss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseicvt")
- (set_attr "athlon_decode" "vector,double")
- (set_attr "amdfam10_decode" "vector,double")
- (set_attr "mode" "SF")])
-
-(define_insn "sse_cvtsi2ssq"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
- (vec_merge:V4SF
- (vec_duplicate:V4SF
- (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
- (match_operand:V4SF 1 "register_operand" "0,0")
- (const_int 1)))]
- "TARGET_SSE && TARGET_64BIT"
- "cvtsi2ssq\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseicvt")
- (set_attr "athlon_decode" "vector,double")
- (set_attr "amdfam10_decode" "vector,double")
- (set_attr "mode" "SF")])
-
-(define_insn "sse_cvtss2si"
- [(set (match_operand:SI 0 "register_operand" "=r,r")
- (unspec:SI
- [(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
- (parallel [(const_int 0)]))]
- UNSPEC_FIX_NOTRUNC))]
- "TARGET_SSE"
- "cvtss2si\t{%1, %0|%0, %1}"
- [(set_attr "type" "sseicvt")
- (set_attr "athlon_decode" "double,vector")
- (set_attr "prefix_rep" "1")
- (set_attr "mode" "SI")])
-
-(define_insn "sse_cvtss2si_2"
- [(set (match_operand:SI 0 "register_operand" "=r,r")
- (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
- UNSPEC_FIX_NOTRUNC))]
- "TARGET_SSE"
- "cvtss2si\t{%1, %0|%0, %1}"
- [(set_attr "type" "sseicvt")
- (set_attr "athlon_decode" "double,vector")
- (set_attr "amdfam10_decode" "double,double")
- (set_attr "prefix_rep" "1")
- (set_attr "mode" "SI")])
-
-(define_insn "sse_cvtss2siq"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
- (unspec:DI
- [(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
- (parallel [(const_int 0)]))]
- UNSPEC_FIX_NOTRUNC))]
- "TARGET_SSE && TARGET_64BIT"
- "cvtss2siq\t{%1, %0|%0, %1}"
- [(set_attr "type" "sseicvt")
- (set_attr "athlon_decode" "double,vector")
- (set_attr "prefix_rep" "1")
- (set_attr "mode" "DI")])
-
-(define_insn "sse_cvtss2siq_2"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
- (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
- UNSPEC_FIX_NOTRUNC))]
- "TARGET_SSE && TARGET_64BIT"
- "cvtss2siq\t{%1, %0|%0, %1}"
- [(set_attr "type" "sseicvt")
- (set_attr "athlon_decode" "double,vector")
- (set_attr "amdfam10_decode" "double,double")
- (set_attr "prefix_rep" "1")
- (set_attr "mode" "DI")])
-
-(define_insn "sse_cvttss2si"
- [(set (match_operand:SI 0 "register_operand" "=r,r")
- (fix:SI
- (vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
- (parallel [(const_int 0)]))))]
- "TARGET_SSE"
- "cvttss2si\t{%1, %0|%0, %1}"
- [(set_attr "type" "sseicvt")
- (set_attr "athlon_decode" "double,vector")
- (set_attr "amdfam10_decode" "double,double")
- (set_attr "prefix_rep" "1")
- (set_attr "mode" "SI")])
-
-(define_insn "sse_cvttss2siq"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
- (fix:DI
- (vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
- (parallel [(const_int 0)]))))]
- "TARGET_SSE && TARGET_64BIT"
- "cvttss2siq\t{%1, %0|%0, %1}"
- [(set_attr "type" "sseicvt")
- (set_attr "athlon_decode" "double,vector")
- (set_attr "amdfam10_decode" "double,double")
- (set_attr "prefix_rep" "1")
- (set_attr "mode" "DI")])
-
-(define_insn "sse2_cvtdq2ps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2"
- "cvtdq2ps\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse2_cvtps2dq"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
- UNSPEC_FIX_NOTRUNC))]
- "TARGET_SSE2"
- "cvtps2dq\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
- (set_attr "prefix_data16" "1")
- (set_attr "mode" "TI")])
-
-(define_insn "sse2_cvttps2dq"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2"
- "cvttps2dq\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
- (set_attr "prefix_rep" "1")
- (set_attr "mode" "TI")])
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; Parallel single-precision floating point element swizzling
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_insn "sse_movhlps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
- (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
- (parallel [(const_int 6)
- (const_int 7)
- (const_int 2)
- (const_int 3)])))]
- "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
- "@
- movhlps\t{%2, %0|%0, %2}
- movlps\t{%H2, %0|%0, %H2}
- movhps\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF,V2SF,V2SF")])
-
-(define_insn "sse_movlhps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
- (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
- (parallel [(const_int 0)
- (const_int 1)
- (const_int 4)
- (const_int 5)])))]
- "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
- "@
- movlhps\t{%2, %0|%0, %2}
- movhps\t{%2, %0|%0, %2}
- movlps\t{%2, %H0|%H0, %2}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF,V2SF,V2SF")])
-
-(define_insn "sse_unpckhps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (parallel [(const_int 2) (const_int 6)
- (const_int 3) (const_int 7)])))]
- "TARGET_SSE"
- "unpckhps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse_unpcklps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0) (const_int 4)
- (const_int 1) (const_int 5)])))]
- "TARGET_SSE"
- "unpcklps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
-
-;; These are modeled with the same vec_concat as the others so that we
-;; capture users of shufps that can use the new instructions
-(define_insn "sse3_movshdup"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")
- (match_dup 1))
- (parallel [(const_int 1)
- (const_int 1)
- (const_int 7)
- (const_int 7)])))]
- "TARGET_SSE3"
- "movshdup\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
- (set_attr "prefix_rep" "1")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse3_movsldup"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")
- (match_dup 1))
- (parallel [(const_int 0)
- (const_int 0)
- (const_int 6)
- (const_int 6)])))]
- "TARGET_SSE3"
- "movsldup\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
- (set_attr "prefix_rep" "1")
- (set_attr "mode" "V4SF")])
-
-(define_expand "sse_shufps"
- [(match_operand:V4SF 0 "register_operand" "")
- (match_operand:V4SF 1 "register_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")
- (match_operand:SI 3 "const_int_operand" "")]
- "TARGET_SSE"
-{
- int mask = INTVAL (operands[3]);
- emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
- GEN_INT ((mask >> 0) & 3),
- GEN_INT ((mask >> 2) & 3),
- GEN_INT (((mask >> 4) & 3) + 4),
- GEN_INT (((mask >> 6) & 3) + 4)));
- DONE;
-})
-
-(define_insn "sse_shufps_1"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (parallel [(match_operand 3 "const_0_to_3_operand" "")
- (match_operand 4 "const_0_to_3_operand" "")
- (match_operand 5 "const_4_to_7_operand" "")
- (match_operand 6 "const_4_to_7_operand" "")])))]
- "TARGET_SSE"
-{
- int mask = 0;
- mask |= INTVAL (operands[3]) << 0;
- mask |= INTVAL (operands[4]) << 2;
- mask |= (INTVAL (operands[5]) - 4) << 4;
- mask |= (INTVAL (operands[6]) - 4) << 6;
- operands[3] = GEN_INT (mask);
-
- return "shufps\t{%3, %2, %0|%0, %2, %3}";
-}
- [(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse_storehps"
- [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
- (vec_select:V2SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
- (parallel [(const_int 2) (const_int 3)])))]
- "TARGET_SSE"
- "@
- movhps\t{%1, %0|%0, %1}
- movhlps\t{%1, %0|%0, %1}
- movlps\t{%H1, %0|%0, %H1}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "V2SF,V4SF,V2SF")])
-
-(define_insn "sse_loadhps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
- (vec_concat:V4SF
- (vec_select:V2SF
- (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
- (parallel [(const_int 0) (const_int 1)]))
- (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
- "TARGET_SSE"
- "@
- movhps\t{%2, %0|%0, %2}
- movlhps\t{%2, %0|%0, %2}
- movlps\t{%2, %H0|%H0, %2}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "V2SF,V4SF,V2SF")])
-
-(define_insn "sse_storelps"
- [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
- (vec_select:V2SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
- (parallel [(const_int 0) (const_int 1)])))]
- "TARGET_SSE"
- "@
- movlps\t{%1, %0|%0, %1}
- movaps\t{%1, %0|%0, %1}
- movlps\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "V2SF,V4SF,V2SF")])
-
-(define_insn "sse_loadlps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
- (vec_concat:V4SF
- (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
- (vec_select:V2SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
- (parallel [(const_int 2) (const_int 3)]))))]
- "TARGET_SSE"
- "@
- shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
- movlps\t{%2, %0|%0, %2}
- movlps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog,ssemov,ssemov")
- (set_attr "mode" "V4SF,V2SF,V2SF")])
-
-(define_insn "sse_movss"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (match_operand:V4SF 2 "register_operand" "x")
- (match_operand:V4SF 1 "register_operand" "0")
- (const_int 1)))]
- "TARGET_SSE"
- "movss\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "SF")])
-
-(define_insn "*vec_dupv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_duplicate:V4SF
- (match_operand:SF 1 "register_operand" "0")))]
- "TARGET_SSE"
- "shufps\t{$0, %0, %0|%0, %0, 0}"
- [(set_attr "type" "sselog1")
- (set_attr "mode" "V4SF")])
-
-;; ??? In theory we can match memory for the MMX alternative, but allowing
-;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
-;; alternatives pretty much forces the MMX alternative to be chosen.
-(define_insn "*sse_concatv2sf"
- [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
- (vec_concat:V2SF
- (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
- (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
- "TARGET_SSE"
- "@
- unpcklps\t{%2, %0|%0, %2}
- movss\t{%1, %0|%0, %1}
- punpckldq\t{%2, %0|%0, %2}
- movd\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
- (set_attr "mode" "V4SF,SF,DI,DI")])
-
-(define_insn "*sse_concatv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
- (vec_concat:V4SF
- (match_operand:V2SF 1 "register_operand" " 0,0")
- (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
- "TARGET_SSE"
- "@
- movlhps\t{%2, %0|%0, %2}
- movhps\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF,V2SF")])
-
-(define_expand "vec_initv4sf"
- [(match_operand:V4SF 0 "register_operand" "")
- (match_operand 1 "" "")]
- "TARGET_SSE"
-{
- ix86_expand_vector_init (false, operands[0], operands[1]);
- DONE;
-})
-
-(define_insn "vec_setv4sf_0"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
- (vec_merge:V4SF
- (vec_duplicate:V4SF
- (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
- (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
- (const_int 1)))]
- "TARGET_SSE"
- "@
- movss\t{%2, %0|%0, %2}
- movss\t{%2, %0|%0, %2}
- movd\t{%2, %0|%0, %2}
- #"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "SF")])
-
-;; A subset is vec_setv4sf.
-(define_insn "*vec_setv4sf_sse4_1"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (vec_duplicate:V4SF
- (match_operand:SF 2 "nonimmediate_operand" "xm"))
- (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
- "TARGET_SSE4_1"
-{
- operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
- return "insertps\t{%3, %2, %0|%0, %2, %3}";
-}
- [(set_attr "type" "sselog")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse4_1_insertps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
- (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:SI 3 "const_0_to_255_operand" "n")]
- UNSPEC_INSERTPS))]
- "TARGET_SSE4_1"
- "insertps\t{%3, %2, %0|%0, %2, %3}";
- [(set_attr "type" "sselog")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V4SF")])
-
-(define_split
- [(set (match_operand:V4SF 0 "memory_operand" "")
- (vec_merge:V4SF
- (vec_duplicate:V4SF
- (match_operand:SF 1 "nonmemory_operand" ""))
- (match_dup 0)
- (const_int 1)))]
- "TARGET_SSE && reload_completed"
- [(const_int 0)]
-{
- emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
- DONE;
-})
-
-(define_expand "vec_setv4sf"
- [(match_operand:V4SF 0 "register_operand" "")
- (match_operand:SF 1 "register_operand" "")
- (match_operand 2 "const_int_operand" "")]
- "TARGET_SSE"
-{
- ix86_expand_vector_set (false, operands[0], operands[1],
- INTVAL (operands[2]));
- DONE;
-})
-
-(define_insn_and_split "*vec_extractv4sf_0"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
- (vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
- (parallel [(const_int 0)])))]
- "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "#"
- "&& reload_completed"
- [(const_int 0)]
-{
- rtx op1 = operands[1];
- if (REG_P (op1))
- op1 = gen_rtx_REG (SFmode, REGNO (op1));
- else
- op1 = gen_lowpart (SFmode, op1);
- emit_move_insn (operands[0], op1);
- DONE;
-})
-
-(define_insn "*sse4_1_extractps"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
- (vec_select:SF
- (match_operand:V4SF 1 "register_operand" "x")
- (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
- "TARGET_SSE4_1"
- "extractps\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sselog")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V4SF")])
-
-(define_insn_and_split "*vec_extract_v4sf_mem"
- [(set (match_operand:SF 0 "register_operand" "=x*rf")
- (vec_select:SF
- (match_operand:V4SF 1 "memory_operand" "o")
- (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
- ""
- "#"
- "reload_completed"
- [(const_int 0)]
-{
- int i = INTVAL (operands[2]);
-
- emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
- DONE;
-})
-
-(define_expand "vec_extractv4sf"
- [(match_operand:SF 0 "register_operand" "")
- (match_operand:V4SF 1 "register_operand" "")
- (match_operand 2 "const_int_operand" "")]
- "TARGET_SSE"
-{
- ix86_expand_vector_extract (false, operands[0], operands[1],
- INTVAL (operands[2]));
- DONE;
-})
+ (set_attr "mode" "<ssevecmode>")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
@@ -2137,485 +1631,179 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; Parallel double-precision floating point arithmetic
+;; Parallel single-precision floating point conversion operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_expand "negv2df2"
- [(set (match_operand:V2DF 0 "register_operand" "")
- (neg:V2DF (match_operand:V2DF 1 "register_operand" "")))]
- "TARGET_SSE2"
- "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
-
-(define_expand "absv2df2"
- [(set (match_operand:V2DF 0 "register_operand" "")
- (abs:V2DF (match_operand:V2DF 1 "register_operand" "")))]
- "TARGET_SSE2"
- "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
-
-(define_expand "addv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "")
- (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
- (match_operand:V2DF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE2"
- "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
-
-(define_insn "*addv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
- "addpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_vmaddv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_merge:V2DF
- (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
- "addsd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "DF")])
-
-(define_expand "subv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "")
- (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
- (match_operand:V2DF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE2"
- "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
-
-(define_insn "*subv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2"
- "subpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_vmsubv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_merge:V2DF
- (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE2"
- "subsd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "DF")])
-
-(define_expand "mulv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "")
- (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
- (match_operand:V2DF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE2"
- "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
-
-(define_insn "*mulv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
- "mulpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssemul")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_vmmulv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_merge:V2DF
- (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
- "mulsd\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssemul")
- (set_attr "mode" "DF")])
-
-(define_expand "divv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "")
- (div:V2DF (match_operand:V2DF 1 "register_operand" "")
- (match_operand:V2DF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE2"
- "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
-
-(define_insn "*divv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2"
- "divpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssediv")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_vmdivv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_merge:V2DF
- (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE2"
- "divsd\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssediv")
- (set_attr "mode" "DF")])
-
-(define_insn "sqrtv2df2"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2"
- "sqrtpd\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_vmsqrtv2df2"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_merge:V2DF
- (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
- (match_operand:V2DF 2 "register_operand" "0")
- (const_int 1)))]
- "TARGET_SSE2"
- "sqrtsd\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
- (set_attr "mode" "DF")])
-
-;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
-;; isn't really correct, as those rtl operators aren't defined when
-;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
-
-(define_expand "smaxv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "")
- (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
- (match_operand:V2DF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE2"
-{
- if (!flag_finite_math_only)
- operands[1] = force_reg (V2DFmode, operands[1]);
- ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
-})
-
-(define_insn "*smaxv2df3_finite"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2 && flag_finite_math_only
- && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
- "maxpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "V2DF")])
-
-(define_insn "*smaxv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2"
- "maxpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_vmsmaxv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_merge:V2DF
- (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE2"
- "maxsd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "DF")])
-
-(define_expand "sminv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "")
- (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
- (match_operand:V2DF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE2"
-{
- if (!flag_finite_math_only)
- operands[1] = force_reg (V2DFmode, operands[1]);
- ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
-})
+(define_insn "sse_cvtpi2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4SF 1 "register_operand" "0")
+ (const_int 3)))]
+ "TARGET_SSE"
+ "cvtpi2ps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
-(define_insn "*sminv2df3_finite"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2 && flag_finite_math_only
- && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
- "minpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "V2DF")])
+(define_insn "sse_cvtps2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC)
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_SSE"
+ "cvtps2pi\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx")
+ (set_attr "mode" "DI")])
-(define_insn "*sminv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2"
- "minpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "V2DF")])
+(define_insn "sse_cvttps2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_SSE"
+ "cvttps2pi\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx")
+ (set_attr "mode" "SF")])
-(define_insn "sse2_vmsminv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_merge:V2DF
- (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
+(define_insn "sse_cvtsi2ss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
+ (match_operand:V4SF 1 "register_operand" "0,0")
(const_int 1)))]
- "TARGET_SSE2"
- "minsd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "DF")])
+ "TARGET_SSE"
+ "cvtsi2ss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
+ (set_attr "mode" "SF")])
-(define_insn "sse3_addsubv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_merge:V2DF
- (plus:V2DF
- (match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
- (minus:V2DF (match_dup 1) (match_dup 2))
+(define_insn "sse_cvtsi2ssq"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+ (match_operand:V4SF 1 "register_operand" "0,0")
(const_int 1)))]
- "TARGET_SSE3"
- "addsubpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse3_haddv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_concat:V2DF
- (plus:DF
- (vec_select:DF
- (match_operand:V2DF 1 "register_operand" "0")
- (parallel [(const_int 0)]))
- (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
- (plus:DF
- (vec_select:DF
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 0)]))
- (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
- "TARGET_SSE3"
- "haddpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse3_hsubv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_concat:V2DF
- (minus:DF
- (vec_select:DF
- (match_operand:V2DF 1 "register_operand" "0")
- (parallel [(const_int 0)]))
- (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
- (minus:DF
- (vec_select:DF
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 0)]))
- (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
- "TARGET_SSE3"
- "hsubpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "V2DF")])
-
-(define_expand "reduc_splus_v2df"
- [(match_operand:V2DF 0 "register_operand" "")
- (match_operand:V2DF 1 "register_operand" "")]
- "TARGET_SSE3"
-{
- emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
- DONE;
-})
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtsi2ssq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
+ (set_attr "mode" "SF")])
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; Parallel double-precision floating point comparisons
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "sse_cvtss2si"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI
+ [(vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE"
+ "cvtss2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "SI")])
-(define_insn "sse2_maskcmpv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (match_operator:V2DF 3 "sse_comparison_operator"
- [(match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
- "TARGET_SSE2 && !TARGET_SSE5"
- "cmp%D3pd\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssecmp")
- (set_attr "mode" "V2DF")])
+(define_insn "sse_cvtss2si_2"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE"
+ "cvtss2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "SI")])
-(define_insn "sse2_maskcmpdf3"
- [(set (match_operand:DF 0 "register_operand" "=x")
- (match_operator:DF 3 "sse_comparison_operator"
- [(match_operand:DF 1 "register_operand" "0")
- (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
- "TARGET_SSE2 && !TARGET_SSE5"
- "cmp%D3sd\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssecmp")
- (set_attr "mode" "DF")])
+(define_insn "sse_cvtss2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI
+ [(vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "DI")])
-(define_insn "sse2_vmmaskcmpv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_merge:V2DF
- (match_operator:V2DF 3 "sse_comparison_operator"
- [(match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE2 && !TARGET_SSE5"
- "cmp%D3sd\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssecmp")
- (set_attr "mode" "DF")])
+(define_insn "sse_cvtss2siq_2"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "DI")])
-(define_insn "sse2_comi"
- [(set (reg:CCFP FLAGS_REG)
- (compare:CCFP
- (vec_select:DF
- (match_operand:V2DF 0 "register_operand" "x")
- (parallel [(const_int 0)]))
- (vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" "xm")
+(define_insn "sse_cvttss2si"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (fix:SI
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
(parallel [(const_int 0)]))))]
- "TARGET_SSE2"
- "comisd\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecomi")
- (set_attr "mode" "DF")])
+ "TARGET_SSE"
+ "cvttss2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "SI")])
-(define_insn "sse2_ucomi"
- [(set (reg:CCFPU FLAGS_REG)
- (compare:CCFPU
- (vec_select:DF
- (match_operand:V2DF 0 "register_operand" "x")
- (parallel [(const_int 0)]))
- (vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" "xm")
+(define_insn "sse_cvttss2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (fix:DI
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
(parallel [(const_int 0)]))))]
- "TARGET_SSE2"
- "ucomisd\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecomi")
- (set_attr "mode" "DF")])
-
-(define_expand "vcondv2df"
- [(set (match_operand:V2DF 0 "register_operand" "")
- (if_then_else:V2DF
- (match_operator 3 ""
- [(match_operand:V2DF 4 "nonimmediate_operand" "")
- (match_operand:V2DF 5 "nonimmediate_operand" "")])
- (match_operand:V2DF 1 "general_operand" "")
- (match_operand:V2DF 2 "general_operand" "")))]
- "TARGET_SSE2"
-{
- if (ix86_expand_fp_vcond (operands))
- DONE;
- else
- FAIL;
-})
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; Parallel double-precision floating point logical operations
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_expand "andv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "")
- (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
- (match_operand:V2DF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE2"
- "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
-
-(define_insn "*andv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
- "andpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_nandv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2"
- "andnpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
-
-(define_expand "iorv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "")
- (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
- (match_operand:V2DF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE2"
- "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
-
-(define_insn "*iorv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
- "orpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
-
-(define_expand "xorv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "")
- (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
- (match_operand:V2DF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE2"
- "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
-
-(define_insn "*xorv2df3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
- "xorpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
-
-;; Also define scalar versions. These are used for abs, neg, and
-;; conditional move. Using subregs into vector modes causes register
-;; allocation lossage. These patterns do not allow memory operands
-;; because the native instructions read the full 128-bits.
-
-(define_insn "*anddf3"
- [(set (match_operand:DF 0 "register_operand" "=x")
- (and:DF (match_operand:DF 1 "register_operand" "0")
- (match_operand:DF 2 "register_operand" "x")))]
- "TARGET_SSE2"
- "andpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
+ "TARGET_SSE && TARGET_64BIT"
+ "cvttss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "DI")])
-(define_insn "*nanddf3"
- [(set (match_operand:DF 0 "register_operand" "=x")
- (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
- (match_operand:DF 2 "register_operand" "x")))]
+(define_insn "sse2_cvtdq2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
- "andnpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
+ "cvtdq2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
-(define_insn "*iordf3"
- [(set (match_operand:DF 0 "register_operand" "=x")
- (ior:DF (match_operand:DF 1 "register_operand" "0")
- (match_operand:DF 2 "register_operand" "x")))]
+(define_insn "sse2_cvtps2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE2"
- "orpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
+ "cvtps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
-(define_insn "*xordf3"
- [(set (match_operand:DF 0 "register_operand" "=x")
- (xor:DF (match_operand:DF 1 "register_operand" "0")
- (match_operand:DF 2 "register_operand" "x")))]
+(define_insn "sse2_cvttps2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
- "xorpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
+ "cvttps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
@@ -2698,7 +1886,7 @@
(define_insn "sse2_cvtsd2si_2"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
- UNSPEC_FIX_NOTRUNC))]
+ UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE2"
"cvtsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
@@ -2724,7 +1912,7 @@
(define_insn "sse2_cvtsd2siq_2"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
- UNSPEC_FIX_NOTRUNC))]
+ UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE2 && TARGET_64BIT"
"cvtsd2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
@@ -3035,6 +2223,388 @@
DONE;
})
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse_movhlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
+ (parallel [(const_int 6)
+ (const_int 7)
+ (const_int 2)
+ (const_int 3)])))]
+ "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ movhlps\t{%2, %0|%0, %2}
+ movlps\t{%H2, %0|%0, %H2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_movlhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 4)
+ (const_int 5)])))]
+ "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
+ "@
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}
+ movlps\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_unpckhps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_SSE"
+ "unpckhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_unpcklps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_SSE"
+ "unpcklps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+;; These are modeled with the same vec_concat as the others so that we
+;; capture users of shufps that can use the new instructions
+(define_insn "sse3_movshdup"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_dup 1))
+ (parallel [(const_int 1)
+ (const_int 1)
+ (const_int 7)
+ (const_int 7)])))]
+ "TARGET_SSE3"
+ "movshdup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse3_movsldup"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_dup 1))
+ (parallel [(const_int 0)
+ (const_int 0)
+ (const_int 6)
+ (const_int 6)])))]
+ "TARGET_SSE3"
+ "movsldup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_expand "sse_shufps"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4)));
+ DONE;
+})
+
+(define_insn "sse_shufps_1"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_4_to_7_operand" "")
+ (match_operand 6 "const_4_to_7_operand" "")])))]
+ "TARGET_SSE"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[3]) << 0;
+ mask |= INTVAL (operands[4]) << 2;
+ mask |= (INTVAL (operands[5]) - 4) << 4;
+ mask |= (INTVAL (operands[6]) - 4) << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "shufps\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_storehps"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 2) (const_int 3)])))]
+ "TARGET_SSE"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ movhlps\t{%1, %0|%0, %1}
+ movlps\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_loadhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_concat:V4SF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
+ "TARGET_SSE"
+ "@
+ movhps\t{%2, %0|%0, %2}
+ movlhps\t{%2, %0|%0, %2}
+ movlps\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_storelps"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_SSE"
+ "@
+ movlps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_loadlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_concat:V4SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_SSE"
+ "@
+ shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
+ movlps\t{%2, %0|%0, %2}
+ movlps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_movss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:V4SF 1 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "movss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+(define_insn "*vec_dupv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "register_operand" "0")))]
+ "TARGET_SSE"
+ "shufps\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "V4SF")])
+
+;; ??? In theory we can match memory for the MMX alternative, but allowing
+;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
+;; alternatives pretty much forces the MMX alternative to be chosen.
+(define_insn "*sse_concatv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
+ (vec_concat:V2SF
+ (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
+ (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
+ "TARGET_SSE"
+ "@
+ unpcklps\t{%2, %0|%0, %2}
+ movss\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+ (set_attr "mode" "V4SF,SF,DI,DI")])
+
+(define_insn "*sse_concatv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_concat:V4SF
+ (match_operand:V2SF 1 "register_operand" " 0,0")
+ (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
+ "TARGET_SSE"
+ "@
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF,V2SF")])
+
+(define_expand "vec_initv4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+(define_insn "vec_setv4sf_0"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
+ (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "@
+ movss\t{%2, %0|%0, %2}
+ movss\t{%2, %0|%0, %2}
+ movd\t{%2, %0|%0, %2}
+ #"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+;; A subset is vec_setv4sf.
+(define_insn "*vec_setv4sf_sse4_1"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+ "TARGET_SSE4_1"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
+ return "insertps\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_insertps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_INSERTPS))]
+ "TARGET_SSE4_1"
+ "insertps\t{%3, %2, %0|%0, %2, %3}";
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_split
+ [(set (match_operand:V4SF 0 "memory_operand" "")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "nonmemory_operand" ""))
+ (match_dup 0)
+ (const_int 1)))]
+ "TARGET_SSE && reload_completed"
+ [(const_int 0)]
+{
+ emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
+ DONE;
+})
+
+(define_expand "vec_setv4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:SF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_insn_and_split "*vec_extractv4sf_0"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (SFmode, REGNO (op1));
+ else
+ op1 = gen_lowpart (SFmode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_insn "*sse4_1_extractps"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "extractps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn_and_split "*vec_extract_v4sf_mem"
+ [(set (match_operand:SF 0 "register_operand" "=x*rf")
+ (vec_select:SF
+ (match_operand:V4SF 1 "memory_operand" "o")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+{
+ int i = INTVAL (operands[2]);
+
+ emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
+ DONE;
+})
+
+(define_expand "vec_extractv4sf"
+ [(match_operand:SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
@@ -5847,23 +5417,15 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
-(define_insn "sse_movmskps"
+(define_insn "<sse>_movmskp<ssemodesuffixf2c>"
[(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
- UNSPEC_MOVMSK))]
- "TARGET_SSE"
- "movmskps\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse2_movmskpd"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
- UNSPEC_MOVMSK))]
- "TARGET_SSE2"
- "movmskpd\t{%1, %0|%0, %1}"
+ (unspec:SI
+ [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
+ UNSPEC_MOVMSK))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "<MODE>")])
(define_insn "sse2_pmovmskb"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -6017,7 +5579,12 @@
"monitor"
[(set_attr "length" "3")])
-;; SSSE3
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; SSSE3 instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
(define_insn "ssse3_phaddwv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_concat:V8HI
@@ -6536,7 +6103,7 @@
[(set (match_operand:V16QI 0 "register_operand" "=x")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
(match_operand:V16QI 2 "nonimmediate_operand" "xm")]
- UNSPEC_PSHUFB))]
+ UNSPEC_PSHUFB))]
"TARGET_SSSE3"
"pshufb\t{%2, %0|%0, %2}";
[(set_attr "type" "sselog1")
@@ -6548,7 +6115,7 @@
[(set (match_operand:V8QI 0 "register_operand" "=y")
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")]
- UNSPEC_PSHUFB))]
+ UNSPEC_PSHUFB))]
"TARGET_SSSE3"
"pshufb\t{%2, %0|%0, %2}";
[(set_attr "type" "sselog1")
@@ -6557,9 +6124,10 @@
(define_insn "ssse3_psign<mode>3"
[(set (match_operand:SSEMODE124 0 "register_operand" "=x")
- (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
- (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
- UNSPEC_PSIGN))]
+ (unspec:SSEMODE124
+ [(match_operand:SSEMODE124 1 "register_operand" "0")
+ (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSIGN))]
"TARGET_SSSE3"
"psign<ssevecsize>\t{%2, %0|%0, %2}";
[(set_attr "type" "sselog1")
@@ -6569,9 +6137,10 @@
(define_insn "ssse3_psign<mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand" "=y")
- (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
- UNSPEC_PSIGN))]
+ (unspec:MMXMODEI
+ [(match_operand:MMXMODEI 1 "register_operand" "0")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PSIGN))]
"TARGET_SSSE3"
"psign<mmxvecsize>\t{%2, %0|%0, %2}";
[(set_attr "type" "sselog1")
@@ -6583,7 +6152,7 @@
(unspec:TI [(match_operand:TI 1 "register_operand" "0")
(match_operand:TI 2 "nonimmediate_operand" "xm")
(match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
- UNSPEC_PALIGNR))]
+ UNSPEC_PALIGNR))]
"TARGET_SSSE3"
{
operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
@@ -6599,7 +6168,7 @@
(unspec:DI [(match_operand:DI 1 "register_operand" "0")
(match_operand:DI 2 "nonimmediate_operand" "ym")
(match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
- UNSPEC_PALIGNR))]
+ UNSPEC_PALIGNR))]
"TARGET_SSSE3"
{
operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
@@ -6634,45 +6203,27 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "sse4a_vmmovntv2df"
- [(set (match_operand:DF 0 "memory_operand" "=m")
- (unspec:DF [(vec_select:DF
- (match_operand:V2DF 1 "register_operand" "x")
- (parallel [(const_int 0)]))]
- UNSPEC_MOVNT))]
- "TARGET_SSE4A"
- "movntsd\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "DF")])
-
-(define_insn "sse4a_movntdf"
- [(set (match_operand:DF 0 "memory_operand" "=m")
- (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
- UNSPEC_MOVNT))]
+(define_insn "sse4a_movnt<mode>"
+ [(set (match_operand:MODEF 0 "memory_operand" "=m")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
"TARGET_SSE4A"
- "movntsd\t{%1, %0|%0, %1}"
+ "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
- (set_attr "mode" "DF")])
-
-(define_insn "sse4a_vmmovntv4sf"
- [(set (match_operand:SF 0 "memory_operand" "=m")
- (unspec:SF [(vec_select:SF
- (match_operand:V4SF 1 "register_operand" "x")
- (parallel [(const_int 0)]))]
- UNSPEC_MOVNT))]
- "TARGET_SSE4A"
- "movntss\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "sse4a_movntsf"
- [(set (match_operand:SF 0 "memory_operand" "=m")
- (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
- UNSPEC_MOVNT))]
+(define_insn "sse4a_vmmovnt<mode>"
+ [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
+ (unspec:<ssescalarmode>
+ [(vec_select:<ssescalarmode>
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (parallel [(const_int 0)]))]
+ UNSPEC_MOVNT))]
"TARGET_SSE4A"
- "movntss\t{%1, %0|%0, %1}"
+ "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "<ssescalarmode>")])
(define_insn "sse4a_extrqi"
[(set (match_operand:V2DI 0 "register_operand" "=x")
@@ -6727,77 +6278,43 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "sse4_1_blendpd"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_merge:V2DF
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")
- (match_operand:V2DF 1 "register_operand" "0")
+(define_insn "sse4_1_blendp<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
(match_operand:SI 3 "const_0_to_3_operand" "n")))]
"TARGET_SSE4_1"
- "blendpd\t{%3, %2, %0|%0, %2, %3}"
+ "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse4_1_blendps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")
- (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:SI 3 "const_0_to_15_operand" "n")))]
- "TARGET_SSE4_1"
- "blendps\t{%3, %2, %0|%0, %2, %3}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse4_1_blendvpd"
- [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
- (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
- (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
- (match_operand:V2DF 3 "register_operand" "Yz")]
- UNSPEC_BLENDV))]
- "TARGET_SSE4_1"
- "blendvpd\t{%3, %2, %0|%0, %2, %3}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "sse4_1_blendvps"
- [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
- (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
- (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
- (match_operand:V4SF 3 "register_operand" "Yz")]
- UNSPEC_BLENDV))]
+(define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
+ (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
+ UNSPEC_BLENDV))]
"TARGET_SSE4_1"
- "blendvps\t{%3, %2, %0|%0, %2, %3}"
+ "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse4_1_dppd"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")
- (match_operand:SI 3 "const_0_to_255_operand" "n")]
- UNSPEC_DP))]
- "TARGET_SSE4_1"
- "dppd\t{%3, %2, %0|%0, %2, %3}"
- [(set_attr "type" "ssemul")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "sse4_1_dpps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")
- (match_operand:SI 3 "const_0_to_255_operand" "n")]
- UNSPEC_DP))]
+(define_insn "sse4_1_dpp<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_DP))]
"TARGET_SSE4_1"
- "dpps\t{%3, %2, %0|%0, %2, %3}"
+ "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemul")
(set_attr "prefix_extra" "1")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "<MODE>")])
(define_insn "sse4_1_movntdqa"
[(set (match_operand:V2DI 0 "register_operand" "=x")
@@ -7245,55 +6762,32 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
-(define_insn "sse4_1_roundpd"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
- (match_operand:SI 2 "const_0_to_15_operand" "n")]
- UNSPEC_ROUND))]
- "TARGET_ROUND"
- "roundpd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssecvt")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse4_1_roundps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
- (match_operand:SI 2 "const_0_to_15_operand" "n")]
- UNSPEC_ROUND))]
- "TARGET_ROUND"
- "roundps\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssecvt")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse4_1_roundsd"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_merge:V2DF
- (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
- (match_operand:SI 3 "const_0_to_15_operand" "n")]
- UNSPEC_ROUND)
- (match_operand:V2DF 1 "register_operand" "0")
- (const_int 1)))]
+(define_insn "sse4_1_roundp<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
+ (match_operand:SI 2 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND))]
"TARGET_ROUND"
- "roundsd\t{%3, %2, %0|%0, %2, %3}"
+ "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssecvt")
(set_attr "prefix_extra" "1")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "sse4_1_roundss"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
- (match_operand:SI 3 "const_0_to_15_operand" "n")]
- UNSPEC_ROUND)
- (match_operand:V4SF 1 "register_operand" "0")
+(define_insn "sse4_1_rounds<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 2 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND)
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
(const_int 1)))]
"TARGET_ROUND"
- "roundss\t{%3, %2, %0|%0, %2, %3}"
+ "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssecvt")
(set_attr "prefix_extra" "1")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "<MODE>")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
@@ -8347,10 +7841,11 @@
;; SSE5 permute instructions
(define_insn "sse5_pperm"
[(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
- (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
- (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
- (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
- UNSPEC_SSE5_PERMUTE))]
+ (unspec:V16QI
+ [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
+ (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
+ (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
+ UNSPEC_SSE5_PERMUTE))]
"TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
"pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "sse4arg")
@@ -8553,7 +8048,7 @@
[(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
UNSPEC_FRCZ))]
"TARGET_SSE5"
- "frcz<ssesuffixf4>\t{%1, %0|%0, %1}"
+ "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "<MODE>")])
@@ -8567,8 +8062,8 @@
UNSPEC_FRCZ)
(match_operand:SSEMODEF2P 1 "register_operand" "0")
(const_int 1)))]
- "TARGET_ROUND"
- "frcz<ssesuffixf2s>\t{%2, %0|%0, %2}"
+ "TARGET_SSE5"
+ "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecvt1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "<MODE>")])
@@ -8710,10 +8205,11 @@
;; being added here to be complete.
(define_insn "sse5_pcom_tf<mode>3"
[(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
- (unspec:SSEMODE1248 [(match_operand:SSEMODE1248 1 "register_operand" "x")
- (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
- (match_operand:SI 3 "const_int_operand" "n")]
- UNSPEC_SSE5_TRUEFALSE))]
+ (unspec:SSEMODE1248
+ [(match_operand:SSEMODE1248 1 "register_operand" "x")
+ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_int_operand" "n")]
+ UNSPEC_SSE5_TRUEFALSE))]
"TARGET_SSE5"
{
return ((INTVAL (operands[3]) != 0)
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index a6ddb8c967f..b5c66622c2a 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -4312,9 +4312,9 @@
[(unspec_volatile [(reg:DI 28)] UNSPEC_BLOCKAGE)]
""
""
- [(set_attr "type" "unknown")
- (set_attr "mode" "none")
- (set_attr "length" "0")])
+ [(set_attr "type" "ghost")
+ (set_attr "mode" "none")
+ (set_attr "length" "0")])
;; Initialize $gp for RTP PIC. Operand 0 is the __GOTT_BASE__ symbol
;; and operand 1 is the __GOTT_INDEX__ symbol.
@@ -5517,9 +5517,9 @@
[(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)]
""
""
- [(set_attr "type" "unknown")
- (set_attr "mode" "none")
- (set_attr "length" "0")])
+ [(set_attr "type" "ghost")
+ (set_attr "mode" "none")
+ (set_attr "length" "0")])
(define_expand "epilogue"
[(const_int 2)]
diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h
index 6c88757d24e..f6c9ee3c2e2 100644
--- a/gcc/config/rs6000/darwin.h
+++ b/gcc/config/rs6000/darwin.h
@@ -432,3 +432,6 @@
/* When generating kernel code or kexts, we don't use Altivec by
default, as kernel code doesn't save/restore those registers. */
#define OS_MISSING_ALTIVEC (flag_mkernel || flag_apple_kext)
+
+/* Darwin has to rename some of the long double builtins. */
+#define SUBTARGET_INIT_BUILTINS darwin_patch_builtins ()
diff --git a/gcc/config/rs6000/eabispe.h b/gcc/config/rs6000/eabispe.h
index 0acc17c7022..c3a3f2b4e53 100644
--- a/gcc/config/rs6000/eabispe.h
+++ b/gcc/config/rs6000/eabispe.h
@@ -31,7 +31,7 @@
#define SUBSUBTARGET_OVERRIDE_OPTIONS \
if (rs6000_select[1].string == NULL) \
rs6000_cpu = PROCESSOR_PPC8540; \
- if (!rs6000_explicit_options.abi) \
+ if (!rs6000_explicit_options.spe_abi) \
rs6000_spe_abi = 1; \
if (!rs6000_explicit_options.float_gprs) \
rs6000_float_gprs = 1; \
diff --git a/gcc/config/rs6000/linuxspe.h b/gcc/config/rs6000/linuxspe.h
index 1aea7442c54..c526cf8dce4 100644
--- a/gcc/config/rs6000/linuxspe.h
+++ b/gcc/config/rs6000/linuxspe.h
@@ -30,7 +30,7 @@
#define SUBSUBTARGET_OVERRIDE_OPTIONS \
if (rs6000_select[1].string == NULL) \
rs6000_cpu = PROCESSOR_PPC8540; \
- if (!rs6000_explicit_options.abi) \
+ if (!rs6000_explicit_options.spe_abi) \
rs6000_spe_abi = 1; \
if (!rs6000_explicit_options.float_gprs) \
rs6000_float_gprs = 1; \
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 4ebea38388b..9b9fefda2af 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -171,7 +171,7 @@ int rs6000_long_double_type_size;
/* IEEE quad extended precision long double. */
int rs6000_ieeequad;
-/* Whether -mabi=altivec has appeared. */
+/* Nonzero to use AltiVec ABI. */
int rs6000_altivec_abi;
/* Nonzero if we want SPE ABI extensions. */
@@ -262,12 +262,14 @@ int rs6000_alignment_flags;
struct {
bool aix_struct_ret; /* True if -maix-struct-ret was used. */
bool alignment; /* True if -malign- was used. */
- bool abi; /* True if -mabi=spe/nospe was used. */
+ bool spe_abi; /* True if -mabi=spe/no-spe was used. */
+ bool altivec_abi; /* True if -mabi=altivec/no-altivec used. */
bool spe; /* True if -mspe= was used. */
bool float_gprs; /* True if -mfloat-gprs= was used. */
bool isel; /* True if -misel was used. */
bool long_double; /* True if -mlong-double- was used. */
bool ieee; /* True if -mabi=ieee/ibmlongdouble used. */
+ bool vrsave; /* True if -mvrsave was used. */
} rs6000_explicit_options;
struct builtin_description
@@ -667,6 +669,25 @@ struct processor_costs ppc8540_cost = {
1, /* prefetch streams /*/
};
+/* Instruction costs on E300C2 and E300C3 cores. */
+static const
+struct processor_costs ppce300c2c3_cost = {
+ COSTS_N_INSNS (4), /* mulsi */
+ COSTS_N_INSNS (4), /* mulsi_const */
+ COSTS_N_INSNS (4), /* mulsi_const9 */
+ COSTS_N_INSNS (4), /* muldi */
+ COSTS_N_INSNS (19), /* divsi */
+ COSTS_N_INSNS (19), /* divdi */
+ COSTS_N_INSNS (3), /* fp */
+ COSTS_N_INSNS (4), /* dmul */
+ COSTS_N_INSNS (18), /* sdiv */
+ COSTS_N_INSNS (33), /* ddiv */
+ 32,
+ 32, /* l1 cache */
+ 256, /* l2 cache */
+ 1, /* prefetch streams /*/
+};
+
/* Instruction costs on POWER4 and POWER5 processors. */
static const
struct processor_costs power4_cost = {
@@ -1418,6 +1439,8 @@ rs6000_override_options (const char *default_cpu)
{"8540", PROCESSOR_PPC8540, POWERPC_BASE_MASK | MASK_STRICT_ALIGN},
/* 8548 has a dummy entry for now. */
{"8548", PROCESSOR_PPC8540, POWERPC_BASE_MASK | MASK_STRICT_ALIGN},
+ {"e300c2", PROCESSOR_PPCE300C2, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
+ {"e300c3", PROCESSOR_PPCE300C3, POWERPC_BASE_MASK},
{"860", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
{"970", PROCESSOR_POWER4,
POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64},
@@ -1524,6 +1547,14 @@ rs6000_override_options (const char *default_cpu)
if (TARGET_E500)
rs6000_isel = 1;
+ if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3)
+ {
+ if (TARGET_ALTIVEC)
+ error ("AltiVec not supported in this target");
+ if (TARGET_SPE)
+ error ("Spe not supported in this target");
+ }
+
/* If we are optimizing big endian systems for space, use the load/store
multiple and string instructions. */
if (BYTES_BIG_ENDIAN && optimize_size)
@@ -1590,11 +1621,18 @@ rs6000_override_options (const char *default_cpu)
if (TARGET_XCOFF && TARGET_ALTIVEC)
rs6000_altivec_abi = 1;
- /* Set Altivec ABI as default for PowerPC64 Linux. */
- if (TARGET_ELF && TARGET_64BIT)
+ /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
+ PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
+ be explicitly overridden in either case. */
+ if (TARGET_ELF)
{
- rs6000_altivec_abi = 1;
- TARGET_ALTIVEC_VRSAVE = 1;
+ if (!rs6000_explicit_options.altivec_abi
+ && (TARGET_64BIT || TARGET_ALTIVEC))
+ rs6000_altivec_abi = 1;
+
+ /* Enable VRSAVE for AltiVec ABI, unless explicitly overridden. */
+ if (!rs6000_explicit_options.vrsave)
+ TARGET_ALTIVEC_VRSAVE = rs6000_altivec_abi;
}
/* Set the Darwin64 ABI as default for 64-bit Darwin. */
@@ -1638,7 +1676,7 @@ rs6000_override_options (const char *default_cpu)
/* For the powerpc-eabispe configuration, we set all these by
default, so let's unset them if we manually set another
CPU that is not the E500. */
- if (!rs6000_explicit_options.abi)
+ if (!rs6000_explicit_options.spe_abi)
rs6000_spe_abi = 0;
if (!rs6000_explicit_options.spe)
rs6000_spe = 0;
@@ -1836,6 +1874,11 @@ rs6000_override_options (const char *default_cpu)
rs6000_cost = &ppc8540_cost;
break;
+ case PROCESSOR_PPCE300C2:
+ case PROCESSOR_PPCE300C3:
+ rs6000_cost = &ppce300c2c3_cost;
+ break;
+
case PROCESSOR_POWER4:
case PROCESSOR_POWER5:
rs6000_cost = &power4_cost;
@@ -2131,6 +2174,7 @@ rs6000_handle_option (size_t code, const char *arg, int value)
break;
case OPT_mvrsave_:
+ rs6000_explicit_options.vrsave = true;
rs6000_parse_yes_no_option ("vrsave", arg, &(TARGET_ALTIVEC_VRSAVE));
break;
@@ -2188,19 +2232,20 @@ rs6000_handle_option (size_t code, const char *arg, int value)
case OPT_mabi_:
if (!strcmp (arg, "altivec"))
{
- rs6000_explicit_options.abi = true;
+ rs6000_explicit_options.altivec_abi = true;
rs6000_altivec_abi = 1;
+
+ /* Enabling the AltiVec ABI turns off the SPE ABI. */
rs6000_spe_abi = 0;
}
else if (! strcmp (arg, "no-altivec"))
{
- /* ??? Don't set rs6000_explicit_options.abi here, to allow
- the default for rs6000_spe_abi to be chosen later. */
+ rs6000_explicit_options.altivec_abi = true;
rs6000_altivec_abi = 0;
}
else if (! strcmp (arg, "spe"))
{
- rs6000_explicit_options.abi = true;
+ rs6000_explicit_options.spe_abi = true;
rs6000_spe_abi = 1;
rs6000_altivec_abi = 0;
if (!TARGET_SPE_ABI)
@@ -2208,7 +2253,7 @@ rs6000_handle_option (size_t code, const char *arg, int value)
}
else if (! strcmp (arg, "no-spe"))
{
- rs6000_explicit_options.abi = true;
+ rs6000_explicit_options.spe_abi = true;
rs6000_spe_abi = 0;
}
@@ -3619,19 +3664,29 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
/* We accept [reg + reg] and [reg + OFFSET]. */
if (GET_CODE (x) == PLUS)
- {
- rtx op1 = XEXP (x, 0);
- rtx op2 = XEXP (x, 1);
+ {
+ rtx op1 = XEXP (x, 0);
+ rtx op2 = XEXP (x, 1);
+ rtx y;
- op1 = force_reg (Pmode, op1);
+ op1 = force_reg (Pmode, op1);
- if (GET_CODE (op2) != REG
- && (GET_CODE (op2) != CONST_INT
- || !SPE_CONST_OFFSET_OK (INTVAL (op2))))
- op2 = force_reg (Pmode, op2);
+ if (GET_CODE (op2) != REG
+ && (GET_CODE (op2) != CONST_INT
+ || !SPE_CONST_OFFSET_OK (INTVAL (op2))
+ || (GET_MODE_SIZE (mode) > 8
+ && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
+ op2 = force_reg (Pmode, op2);
- return gen_rtx_PLUS (Pmode, op1, op2);
- }
+ /* We can't always do [reg + reg] for these, because [reg +
+ reg + offset] is not a legitimate addressing mode. */
+ y = gen_rtx_PLUS (Pmode, op1, op2);
+
+ if (GET_MODE_SIZE (mode) > 8 && REG_P (op2))
+ return force_reg (Pmode, y);
+ else
+ return y;
+ }
return force_reg (Pmode, x);
}
@@ -9166,6 +9221,10 @@ rs6000_init_builtins (void)
if (built_in_decls [BUILT_IN_CLOG])
set_user_assembler_name (built_in_decls [BUILT_IN_CLOG], "__clog");
#endif
+
+#ifdef SUBTARGET_INIT_BUILTINS
+ SUBTARGET_INIT_BUILTINS;
+#endif
}
/* Search through a set of builtins and enable the mask bits.
@@ -18502,6 +18561,8 @@ rs6000_issue_rate (void)
case CPU_PPC7400:
case CPU_PPC8540:
case CPU_CELL:
+ case CPU_PPCE300C2:
+ case CPU_PPCE300C3:
return 2;
case CPU_RIOS2:
case CPU_PPC604:
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 6a64eae3dd9..7f7dd57e1e1 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -60,6 +60,18 @@
#define TARGET_PAIRED_FLOAT 0
#endif
+#ifdef HAVE_AS_POPCNTB
+#define ASM_CPU_POWER5_SPEC "-mpower5"
+#else
+#define ASM_CPU_POWER5_SPEC "-mpower4"
+#endif
+
+#ifdef HAVE_AS_DFP
+#define ASM_CPU_POWER6_SPEC "-mpower6 -maltivec"
+#else
+#define ASM_CPU_POWER6_SPEC "-mpower4 -maltivec"
+#endif
+
/* Common ASM definitions used by ASM_SPEC among the various targets
for handling -mcpu=xxx switches. */
#define ASM_CPU_SPEC \
@@ -76,10 +88,10 @@
%{mcpu=power2: -mpwrx} \
%{mcpu=power3: -mppc64} \
%{mcpu=power4: -mpower4} \
-%{mcpu=power5: -mpower4} \
-%{mcpu=power5+: -mpower4} \
-%{mcpu=power6: -mpower4 -maltivec} \
-%{mcpu=power6x: -mpower4 -maltivec} \
+%{mcpu=power5: %(asm_cpu_power5)} \
+%{mcpu=power5+: %(asm_cpu_power5)} \
+%{mcpu=power6: %(asm_cpu_power6) -maltivec} \
+%{mcpu=power6x: %(asm_cpu_power6) -maltivec} \
%{mcpu=powerpc: -mppc} \
%{mcpu=rios: -mpwr} \
%{mcpu=rios1: -mpwr} \
@@ -117,6 +129,8 @@
%{mcpu=G5: -mpower4 -maltivec} \
%{mcpu=8540: -me500} \
%{mcpu=8548: -me500} \
+%{mcpu=e300c2: -me300} \
+%{mcpu=e300c3: -me300} \
%{maltivec: -maltivec} \
-many"
@@ -141,6 +155,8 @@
{ "asm_cpu", ASM_CPU_SPEC }, \
{ "asm_default", ASM_DEFAULT_SPEC }, \
{ "cc1_cpu", CC1_CPU_SPEC }, \
+ { "asm_cpu_power5", ASM_CPU_POWER5_SPEC }, \
+ { "asm_cpu_power6", ASM_CPU_POWER6_SPEC }, \
SUBTARGET_EXTRA_SPECS
/* -mcpu=native handling only makes sense with compiler running on
@@ -262,6 +278,8 @@ enum processor_type
PROCESSOR_PPC7400,
PROCESSOR_PPC7450,
PROCESSOR_PPC8540,
+ PROCESSOR_PPCE300C2,
+ PROCESSOR_PPCE300C3,
PROCESSOR_POWER4,
PROCESSOR_POWER5,
PROCESSOR_POWER6,
@@ -596,6 +614,7 @@ extern enum rs6000_nop_insertion rs6000_sched_insert_nops;
Make vector constants quadword aligned. */
#define CONSTANT_ALIGNMENT(EXP, ALIGN) \
(TREE_CODE (EXP) == STRING_CST \
+ && (STRICT_ALIGNMENT || !optimize_size) \
&& (ALIGN) < BITS_PER_WORD \
? BITS_PER_WORD \
: (ALIGN))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index d1b43dc78af..777a1ecf46d 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -133,7 +133,7 @@
;; Processor type -- this attribute must exactly match the processor_type
;; enumeration in rs6000.h.
-(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,power4,power5,power6,cell"
+(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppce300c2,ppce300c3,power4,power5,power6,cell"
(const (symbol_ref "rs6000_cpu_attr")))
@@ -166,6 +166,7 @@
(include "7xx.md")
(include "7450.md")
(include "8540.md")
+(include "e300c2c3.md")
(include "power4.md")
(include "power5.md")
(include "power6.md")
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 4088ef73cdf..dccbc1ea350 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -3838,6 +3838,7 @@ find_barrier (int num_mova, rtx mova, rtx from)
rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
int si_limit;
int hi_limit;
+ rtx orig = from;
/* For HImode: range is 510, add 4 because pc counts from address of
second instruction after this one, subtract 2 for the jump instruction
@@ -3897,6 +3898,7 @@ find_barrier (int num_mova, rtx mova, rtx from)
if (GET_CODE (from) == BARRIER)
{
+ rtx next;
found_barrier = from;
@@ -3905,6 +3907,14 @@ find_barrier (int num_mova, rtx mova, rtx from)
this kind of barrier. */
if (barrier_align (from) > 2)
good_barrier = from;
+
+ /* If we are at the end of a hot/cold block, dump the constants
+ here. */
+ next = NEXT_INSN (from);
+ if (next
+ && NOTE_P (next)
+ && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
+ break;
}
if (broken_move (from))
@@ -4061,7 +4071,8 @@ find_barrier (int num_mova, rtx mova, rtx from)
/* If we exceeded the range, then we must back up over the last
instruction we looked at. Otherwise, we just need to undo the
NEXT_INSN at the end of the loop. */
- if (count_hi > hi_limit || count_si > si_limit)
+ if (PREV_INSN (from) != orig
+ && (count_hi > hi_limit || count_si > si_limit))
from = PREV_INSN (PREV_INSN (from));
else
from = PREV_INSN (from);
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index d7e2727adac..5a580b517cd 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -7378,7 +7378,7 @@ label:
(define_insn "jump_compact"
[(set (pc)
(label_ref (match_operand 0 "" "")))]
- "TARGET_SH1"
+ "TARGET_SH1 && !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)"
"*
{
/* The length is 16 if the delay slot is unfilled. */
diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md
index f1bfdaff607..14854606b8d 100644
--- a/gcc/config/spu/spu.md
+++ b/gcc/config/spu/spu.md
@@ -4455,3 +4455,179 @@ selb\t%0,%4,%0,%3"
DONE;
}")
+
+(define_expand "vec_unpacku_hi_v8hi"
+ [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+ (zero_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "spu_reg_operand" "r")
+ (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
+ ""
+{
+ rtx mask = gen_reg_rtx (TImode);
+ unsigned char arr[16] = {
+ 0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
+ 0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
+
+ emit_move_insn (mask, array_to_constant (TImode, arr));
+ emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v8hi"
+ [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+ (zero_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "spu_reg_operand" "r")
+ (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+""
+{
+ rtx mask = gen_reg_rtx (TImode);
+ unsigned char arr[16] = {
+ 0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
+ 0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
+
+ emit_move_insn (mask, array_to_constant (TImode, arr));
+ emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v8hi"
+ [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "spu_reg_operand" "r")
+ (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
+ ""
+{
+ rtx tmp1 = gen_reg_rtx (V8HImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ rtx mask = gen_reg_rtx (TImode);
+ unsigned char arr[16] = {
+ 0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
+ 0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
+
+ emit_move_insn (mask, array_to_constant (TImode, arr));
+ emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+ emit_insn (gen_spu_xshw (tmp2, tmp1));
+ emit_move_insn (operands[0], tmp2);
+
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v8hi"
+ [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "spu_reg_operand" "r")
+ (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+""
+{
+ rtx tmp1 = gen_reg_rtx (V8HImode);
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ rtx mask = gen_reg_rtx (TImode);
+ unsigned char arr[16] = {
+ 0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
+ 0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
+
+ emit_move_insn (mask, array_to_constant (TImode, arr));
+ emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+ emit_insn (gen_spu_xshw (tmp2, tmp1));
+ emit_move_insn (operands[0], tmp2);
+
+DONE;
+})
+
+(define_expand "vec_unpacku_hi_v16qi"
+ [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+ (zero_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "spu_reg_operand" "r")
+ (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
+ (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+ ""
+{
+ rtx mask = gen_reg_rtx (TImode);
+ unsigned char arr[16] = {
+ 0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
+ 0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
+
+ emit_move_insn (mask, array_to_constant (TImode, arr));
+ emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v16qi"
+ [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+ (zero_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "spu_reg_operand" "r")
+ (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
+ (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
+""
+{
+ rtx mask = gen_reg_rtx (TImode);
+ unsigned char arr[16] = {
+ 0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
+ 0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
+
+ emit_move_insn (mask, array_to_constant (TImode, arr));
+ emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v16qi"
+ [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "spu_reg_operand" "r")
+ (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
+ (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+""
+{
+ rtx tmp1 = gen_reg_rtx (V16QImode);
+ rtx tmp2 = gen_reg_rtx (V8HImode);
+ rtx mask = gen_reg_rtx (TImode);
+ unsigned char arr[16] = {
+ 0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
+ 0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
+
+ emit_move_insn (mask, array_to_constant (TImode, arr));
+ emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+ emit_insn (gen_spu_xsbh (tmp2, tmp1));
+ emit_move_insn (operands[0], tmp2);
+
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v16qi"
+ [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "spu_reg_operand" "r")
+ (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
+ (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
+""
+{
+ rtx tmp1 = gen_reg_rtx (V16QImode);
+ rtx tmp2 = gen_reg_rtx (V8HImode);
+ rtx mask = gen_reg_rtx (TImode);
+ unsigned char arr[16] = {
+ 0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
+ 0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
+
+ emit_move_insn (mask, array_to_constant (TImode, arr));
+ emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+ emit_insn (gen_spu_xsbh (tmp2, tmp1));
+ emit_move_insn (operands[0], tmp2);
+
+DONE;
+})
+
+
+
+