2008-02-26 Basile Starynkevitch <basile@starynkevitch.net>

MELT branch merged with trunk r132671 Merged revisions 132452-132671 via svnmerge from svn+ssh://bstarynk@gcc.gnu.org/svn/gcc/trunk git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@132672 138bc75d-0d04-0410-961f-82ee72b054a4
author: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2008-02-26 13:09:58 +0000
committer: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2008-02-26 13:09:58 +0000
commit: b8053af55de78a3f080783e5113fd6452e5a43c5 (patch)
tree: a5906142e844e296abb7382e34657faf4e58f74f /gcc/config
parent: 4896274c9597b09d4c61bdd2efb3201a72634b3c (diff)
download: gcc-b8053af55de78a3f080783e5113fd6452e5a43c5.tar.gz
24 files changed, 1736 insertions, 1857 deletions
diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S
index 397778b82d9..8fdba55f775 100644
--- a/gcc/config/avr/libgcc.S
+++ b/gcc/config/avr/libgcc.S
@@ -32,6 +32,7 @@ Boston, MA 02110-1301, USA.  */
 #define __SREG__ 0x3f
 #define __SP_H__ 0x3e
 #define __SP_L__ 0x3d
+#define __RAMPZ__ 0x3B
 
 /* Most of the functions here are called directly from avr.md
    patterns, instead of using the standard libcall mechanisms.
@@ -686,20 +687,54 @@ __tablejump__:
 	.endfunc
 #endif /* defined (L_tablejump) */
 
-/* __do_copy_data is only necessary if there is anything in .data section.
-   Does not use RAMPZ - crt*.o provides a replacement for >64K devices.  */
-
 #ifdef L_copy_data
 	.section .init4,"ax",@progbits
 	.global __do_copy_data
 __do_copy_data:
+#if defined(__AVR_HAVE_ELPMX__)
+	ldi	r17, hi8(__data_end)
+	ldi	r26, lo8(__data_start)
+	ldi	r27, hi8(__data_start)
+	ldi	r30, lo8(__data_load_start)
+	ldi	r31, hi8(__data_load_start)
+	ldi	r16, hh8(__data_load_start)
+	out	__RAMPZ__, r16
+	rjmp	.L__do_copy_data_start
+.L__do_copy_data_loop:
+	elpm	r0, Z+
+	st	X+, r0
+.L__do_copy_data_start:
+	cpi	r26, lo8(__data_end)
+	cpc	r27, r17
+	brne	.L__do_copy_data_loop
+#elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
+	ldi	r17, hi8(__data_end)
+	ldi	r26, lo8(__data_start)
+	ldi	r27, hi8(__data_start)
+	ldi	r30, lo8(__data_load_start)
+	ldi	r31, hi8(__data_load_start)
+	ldi	r16, hh8(__data_load_start - 0x10000)
+.L__do_copy_data_carry:
+	inc	r16
+	out	__RAMPZ__, r16
+	rjmp	.L__do_copy_data_start
+.L__do_copy_data_loop:
+	elpm
+	st	X+, r0
+	adiw	r30, 1
+	brcs	.L__do_copy_data_carry
+.L__do_copy_data_start:
+	cpi	r26, lo8(__data_end)
+	cpc	r27, r17
+	brne	.L__do_copy_data_loop
+#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
 	ldi	r17, hi8(__data_end)
 	ldi	r26, lo8(__data_start)
 	ldi	r27, hi8(__data_start)
 	ldi	r30, lo8(__data_load_start)
 	ldi	r31, hi8(__data_load_start)
-	rjmp	.do_copy_data_start
-.do_copy_data_loop:
+	rjmp	.L__do_copy_data_start
+.L__do_copy_data_loop:
 #if defined (__AVR_HAVE_LPMX__)
 	lpm	r0, Z+
 #else
@@ -707,10 +742,11 @@ __do_copy_data:
 	adiw	r30, 1
 #endif
 	st	X+, r0
-.do_copy_data_start:
+.L__do_copy_data_start:
 	cpi	r26, lo8(__data_end)
 	cpc	r27, r17
-	brne	.do_copy_data_loop
+	brne	.L__do_copy_data_loop
+#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
 #endif /* L_copy_data */
 
 /* __do_clear_bss is only necessary if there is anything in .bss section.  */
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
index 9e59b88216b..b8f4ae56713 100644
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -600,6 +600,7 @@ enum reg_class
 #define REG_CLASS_FROM_LETTER(C)		\
   (						\
    (C) == 'a' ? ACR_REGS :			\
+   (C) == 'b' ? GENNONACR_REGS :		\
    (C) == 'h' ? MOF_REGS :			\
    (C) == 'x' ? SPECIAL_REGS :			\
    (C) == 'c' ? CC0_REGS :			\
diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h
index a8ce17c0111..c894bf05104 100644
--- a/gcc/config/darwin-protos.h
+++ b/gcc/config/darwin-protos.h
@@ -89,3 +89,4 @@ extern void darwin_cpp_builtins (struct cpp_reader *);
 extern void darwin_asm_output_anchor (rtx symbol);
 extern bool darwin_kextabi_p (void);
 extern void darwin_override_options (void);
+extern void darwin_patch_builtins (void);
diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
index 307698d095f..13aa021f4c6 100644
--- a/gcc/config/darwin.c
+++ b/gcc/config/darwin.c
@@ -1735,4 +1735,52 @@ darwin_override_options (void)
     flag_var_tracking_uninit = 1;
 }
 
+/* Add $LDBL128 suffix to long double builtins.  */
+
+static void
+darwin_patch_builtin (int fncode)
+{
+  tree fn = built_in_decls[fncode];
+  tree sym;
+  char *newname;
+
+  if (!fn)
+    return;
+
+  sym = DECL_ASSEMBLER_NAME (fn);
+  newname = alloca (IDENTIFIER_LENGTH (sym) + 10);
+  strcpy (newname, "_");
+  strcat (newname, IDENTIFIER_POINTER (sym));
+  strcat (newname, "$LDBL128");
+  set_user_assembler_name (fn, newname);
+  /*sym = get_identifier (newname);
+  SET_DECL_ASSEMBLER_NAME (fn, sym);*/
+
+  fn = implicit_built_in_decls[fncode];
+  if (fn)
+    set_user_assembler_name (fn, newname);
+    /*SET_DECL_ASSEMBLER_NAME (fn, sym);*/
+}
+
+void
+darwin_patch_builtins (void)
+{
+  if (LONG_DOUBLE_TYPE_SIZE != 128)
+    return;
+
+#define PATCH_BUILTIN(fncode) darwin_patch_builtin (fncode);
+#define PATCH_BUILTIN_NO64(fncode) \
+  if (!TARGET_64BIT) \
+    darwin_patch_builtin (fncode);
+#define PATCH_BUILTIN_VARIADIC(fncode) \
+  if (!TARGET_64BIT \
+      && (strverscmp (darwin_macosx_version_min, "10.3.9") >= 0)) \
+    darwin_patch_builtin (fncode);
+#include "darwin-ppc-ldouble-patch.def"
+#undef PATCH_BUILTIN
+#undef PATCH_BUILTIN_NO64
+#undef PATCH_BUILTIN_VARIADIC
+}
+
+
 #include "gt-darwin.h"
diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c
index 96b6311ce7c..f90bd414735 100644
--- a/gcc/config/h8300/h8300.c
+++ b/gcc/config/h8300/h8300.c
@@ -930,7 +930,7 @@ h8300_expand_epilogue (void)
     }
 
   if (!returned_p)
-    emit_insn (gen_rtx_RETURN (VOIDmode));
+    emit_jump_insn (gen_rtx_RETURN (VOIDmode));
 }
 
 /* Return nonzero if the current function is an interrupt
diff --git a/gcc/config/h8300/h8300.md b/gcc/config/h8300/h8300.md
index 08a8d2e9313..9b6c0aa4e16 100644
--- a/gcc/config/h8300/h8300.md
+++ b/gcc/config/h8300/h8300.md
@@ -3282,6 +3282,9 @@
   if (GET_CODE (operands[0]) == MEM
       || GET_CODE (operands[3]) == MEM)
     FAIL;
+
+  if (GET_CODE (operands[3]) != REG)
+    operands[3] = force_reg (HImode, operands[3]);
 }")
 
 (define_insn ""
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5dad2fcf515..5a4456d912b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12148,6 +12148,7 @@ ix86_expand_branch (enum rtx_code code, rtx label)
 		  ix86_expand_branch (code, label);
 		  return;
 		}
+	      break;
 	    case LE: case LEU: case GT: case GTU:
 	      if (lo[1] == constm1_rtx)
 		{
@@ -12156,6 +12157,7 @@ ix86_expand_branch (enum rtx_code code, rtx label)
 		  ix86_expand_branch (code, label);
 		  return;
 		}
+	      break;
 	    default:
 	      break;
 	    }
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index fedac5643a4..f2429846691 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1454,6 +1454,9 @@ enum reg_class
 #define SSE_FLOAT_MODE_P(MODE) \
   ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
 
+#define SSE_VEC_FLOAT_MODE_P(MODE) \
+  ((TARGET_SSE && (MODE) == V4SFmode) || (TARGET_SSE2 && (MODE) == V2DFmode))
+
 #define MMX_REG_P(XOP) (REG_P (XOP) && MMX_REGNO_P (REGNO (XOP)))
 #define MMX_REGNO_P(N) IN_RANGE ((N), FIRST_MMX_REG, LAST_MMX_REG)
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3c9c2cc8c86..92a37280f5f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -13964,25 +13964,15 @@
 ;; 0xffffffff is NaN, but not in normalized form, so we can't represent
 ;; it directly.
 
-(define_insn "*sse_setccsf"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(match_operator:SF 1 "sse_comparison_operator"
-	  [(match_operand:SF 2 "register_operand" "0")
-	   (match_operand:SF 3 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE && !TARGET_SSE5"
-  "cmp%D1ss\t{%3, %0|%0, %3}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "SF")])
-
-(define_insn "*sse_setccdf"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-	(match_operator:DF 1 "sse_comparison_operator"
-	  [(match_operand:DF 2 "register_operand" "0")
-	   (match_operand:DF 3 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE2 && !TARGET_SSE5"
-  "cmp%D1sd\t{%3, %0|%0, %3}"
+(define_insn "*sse_setcc<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(match_operator:MODEF 1 "sse_comparison_operator"
+	  [(match_operand:MODEF 2 "register_operand" "0")
+	   (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
+  "cmp%D1s<ssemodefsuffix>\t{%3, %0|%0, %3}"
   [(set_attr "type" "ssecmp")
-   (set_attr "mode" "DF")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*sse5_setcc<mode>"
   [(set (match_operand:MODEF 0 "register_operand" "=x")
@@ -19383,7 +19373,7 @@
 			 (match_operand:DI 2 "general_operand" "")
 			 (match_operand:DI 3 "general_operand" "")))]
   "TARGET_64BIT"
-  "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
 
 (define_insn "x86_movdicc_0_m1_rex64"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -19437,7 +19427,7 @@
 			 (match_operand:SI 2 "general_operand" "")
 			 (match_operand:SI 3 "general_operand" "")))]
   ""
-  "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
 
 ;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
 ;; the register first winds up with `sbbl $0,reg', which is also weird.
@@ -19495,7 +19485,7 @@
 			 (match_operand:HI 2 "general_operand" "")
 			 (match_operand:HI 3 "general_operand" "")))]
   "TARGET_HIMODE_MATH"
-  "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
 
 (define_insn "*movhicc_noc"
   [(set (match_operand:HI 0 "register_operand" "=r,r")
@@ -19517,7 +19507,7 @@
 			 (match_operand:QI 2 "general_operand" "")
 			 (match_operand:QI 3 "general_operand" "")))]
   "TARGET_QIMODE_MATH"
-  "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
 
 (define_insn_and_split "*movqicc_noc"
   [(set (match_operand:QI 0 "register_operand" "=r,r")
@@ -19539,13 +19529,15 @@
   [(set_attr "type" "icmov")
    (set_attr "mode" "SI")])
 
-(define_expand "movsfcc"
-  [(set (match_operand:SF 0 "register_operand" "")
-	(if_then_else:SF (match_operand 1 "comparison_operator" "")
-			 (match_operand:SF 2 "register_operand" "")
-			 (match_operand:SF 3 "register_operand" "")))]
-  "(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH"
-  "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
+(define_expand "mov<mode>cc"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(if_then_else:X87MODEF
+	  (match_operand 1 "comparison_operator" "")
+	  (match_operand:X87MODEF 2 "register_operand" "")
+	  (match_operand:X87MODEF 3 "register_operand" "")))]
+  "(TARGET_80387 && TARGET_CMOVE)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
 
 (define_insn "*movsfcc_1_387"
   [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
@@ -19563,14 +19555,6 @@
   [(set_attr "type" "fcmov,fcmov,icmov,icmov")
    (set_attr "mode" "SF,SF,SI,SI")])
 
-(define_expand "movdfcc"
-  [(set (match_operand:DF 0 "register_operand" "")
-	(if_then_else:DF (match_operand 1 "comparison_operator" "")
-			 (match_operand:DF 2 "register_operand" "")
-			 (match_operand:DF 3 "register_operand" "")))]
-  "(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)"
-  "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
-
 (define_insn "*movdfcc_1"
   [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r")
 	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
@@ -19623,14 +19607,6 @@
    split_di (operands+3, 1, operands+7, operands+8);
    split_di (operands, 1, operands+2, operands+3);")
 
-(define_expand "movxfcc"
-  [(set (match_operand:XF 0 "register_operand" "")
-	(if_then_else:XF (match_operand 1 "comparison_operator" "")
-			 (match_operand:XF 2 "register_operand" "")
-			 (match_operand:XF 3 "register_operand" "")))]
-  "TARGET_80387 && TARGET_CMOVE"
-  "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
-
 (define_insn "*movxfcc_1"
   [(set (match_operand:XF 0 "register_operand" "=f,f")
 	(if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
@@ -19663,41 +19639,25 @@
 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
 ;; are undefined in this condition, we're certain this is correct.
 
-(define_insn "sminsf3"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(smin:SF (match_operand:SF 1 "nonimmediate_operand" "%0")
-		 (match_operand:SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE_MATH"
-  "minss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
-
-(define_insn "smaxsf3"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(smax:SF (match_operand:SF 1 "nonimmediate_operand" "%0")
-		 (match_operand:SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE_MATH"
-  "maxss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
-
-(define_insn "smindf3"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-	(smin:DF (match_operand:DF 1 "nonimmediate_operand" "%0")
-		 (match_operand:DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "minsd\t{%2, %0|%0, %2}"
+(define_insn "smin<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(smin:MODEF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "%0")
+	  (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "mins<ssemodefsuffix>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "smaxdf3"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-	(smax:DF (match_operand:DF 1 "nonimmediate_operand" "%0")
-		 (match_operand:DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "maxsd\t{%2, %0|%0, %2}"
+(define_insn "smax<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(smax:MODEF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "%0")
+	  (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "maxs<ssemodefsuffix>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
+   (set_attr "mode" "<MODE>")])
 
 ;; These versions of the min/max patterns implement exactly the operations
 ;;   min = (op1 < op2 ? op1 : op2)
@@ -19705,45 +19665,27 @@
 ;; Their operands are not commutative, and thus they may be used in the
 ;; presence of -0.0 and NaN.
 
-(define_insn "*ieee_sminsf3"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(unspec:SF [(match_operand:SF 1 "register_operand" "0")
-		    (match_operand:SF 2 "nonimmediate_operand" "xm")]
-		   UNSPEC_IEEE_MIN))]
-  "TARGET_SSE_MATH"
-  "minss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
-
-(define_insn "*ieee_smaxsf3"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(unspec:SF [(match_operand:SF 1 "register_operand" "0")
-		    (match_operand:SF 2 "nonimmediate_operand" "xm")]
-		   UNSPEC_IEEE_MAX))]
-  "TARGET_SSE_MATH"
-  "maxss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
-
-(define_insn "*ieee_smindf3"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-	(unspec:DF [(match_operand:DF 1 "register_operand" "0")
-		    (match_operand:DF 2 "nonimmediate_operand" "xm")]
-		   UNSPEC_IEEE_MIN))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "minsd\t{%2, %0|%0, %2}"
+(define_insn "*ieee_smin<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MIN))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "mins<ssemodefsuffix>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*ieee_smaxdf3"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-	(unspec:DF [(match_operand:DF 1 "register_operand" "0")
-		    (match_operand:DF 2 "nonimmediate_operand" "xm")]
-		   UNSPEC_IEEE_MAX))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "maxsd\t{%2, %0|%0, %2}"
+(define_insn "*ieee_smax<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MAX))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "maxs<ssemodefsuffix>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
+   (set_attr "mode" "<MODE>")])
 
 ;; Make two stack loads independent:
 ;;   fld aa              fld aa
@@ -19778,7 +19720,7 @@
    (match_operand:QI 2 "register_operand" "")
    (match_operand:QI 3 "const_int_operand" "")]
   ""
-  "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
+  "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
 
 (define_expand "addhicc"
   [(match_operand:HI 0 "register_operand" "")
@@ -19786,7 +19728,7 @@
    (match_operand:HI 2 "register_operand" "")
    (match_operand:HI 3 "const_int_operand" "")]
   ""
-  "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
+  "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
 
 (define_expand "addsicc"
   [(match_operand:SI 0 "register_operand" "")
@@ -19794,7 +19736,7 @@
    (match_operand:SI 2 "register_operand" "")
    (match_operand:SI 3 "const_int_operand" "")]
   ""
-  "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
+  "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
 
 (define_expand "adddicc"
   [(match_operand:DI 0 "register_operand" "")
@@ -19802,7 +19744,7 @@
    (match_operand:DI 2 "register_operand" "")
    (match_operand:DI 3 "const_int_operand" "")]
   "TARGET_64BIT"
-  "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
+  "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
 
 
 ;; Misc patterns (?)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ee819936f6e..3371161f82f 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -63,9 +63,9 @@
 
 (define_insn "*mov<mode>_internal_rex64"
   [(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
-				"=rm,r,*y,*y ,m ,*y,Y2,x,x ,m,r,x")
+				"=rm,r,!y,!y ,m ,!y,Y2,x,x ,m,r,x")
 	(match_operand:MMXMODEI 1 "vector_move_operand"
-				"Cr ,m,C ,*ym,*y,Y2,*y,C,xm,x,x,r"))]
+				"Cr ,m,C ,!ym,!y,Y2,!y,C,xm,x,x,r"))]
   "TARGET_64BIT && TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
@@ -87,9 +87,9 @@
 
 (define_insn "*mov<mode>_internal"
   [(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
-			"=*y,*y ,m ,*y ,*Y2,*Y2,*Y2 ,m  ,*x,*x,*x,m ,?r ,?m")
+			"=!y,!y ,m ,!y ,*Y2,*Y2,*Y2 ,m  ,*x,*x,*x,m ,?r ,?m")
 	(match_operand:MMXMODEI 1 "vector_move_operand"
-			"C  ,*ym,*y,*Y2,*y ,C  ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))]
+			"C  ,!ym,!y,*Y2,!y ,C  ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))]
   "TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
@@ -122,9 +122,9 @@
 
 (define_insn "*movv2sf_internal_rex64"
   [(set (match_operand:V2SF 0 "nonimmediate_operand"
-				"=rm,r,*y ,*y ,m ,*y,Y2,x,x,x,m,r,x")
+				"=rm,r,!y ,!y ,m ,!y,Y2,x,x,x,m,r,x")
         (match_operand:V2SF 1 "vector_move_operand"
-				"Cr ,m ,C ,*ym,*y,Y2,*y,C,x,m,x,x,r"))]
+				"Cr ,m ,C ,!ym,!y,Y2,!y,C,x,m,x,x,r"))]
   "TARGET_64BIT && TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
@@ -147,9 +147,9 @@
 
 (define_insn "*movv2sf_internal"
   [(set (match_operand:V2SF 0 "nonimmediate_operand"
-			"=*y,*y ,m,*y ,*Y2,*x,*x,*x,m ,?r ,?m")
+			"=!y,!y ,m,!y ,*Y2,*x,*x,*x,m ,?r ,?m")
         (match_operand:V2SF 1 "vector_move_operand"
-			"C ,*ym,*y,*Y2,*y ,C ,*x,m ,*x,irm,r"))]
+			"C ,!ym,!y,*Y2,!y ,C ,*x,m ,*x,irm,r"))]
   "TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
diff --git a/gcc/config/i386/netware-libgcc.exp b/gcc/config/i386/netware-libgcc.exp
index a3498c0e720..309cf754943 100644
--- a/gcc/config/i386/netware-libgcc.exp
+++ b/gcc/config/i386/netware-libgcc.exp
@@ -6,6 +6,8 @@
 	__addvsi3,
 #	__ashldi3,
 #	__ashrdi3,
+	__bswapdi2,
+	__bswapsi2,
 	__clzdi2,
 	__clzsi2,
 	__ctzdi2,
@@ -18,12 +20,18 @@
 	__divsc3,
 #	__divtc3,
 	__divxc3,
+	__emutls_get_address,
+	__emutls_register_common,
 	__ffsdi2,
 	__ffssi2,
 	__fixunsdfdi,
 	__fixunssfdi,
 #	__fixunstfdi,
 	__fixunsxfdi,
+	__floatundisf,
+	__floatundidf,
+#	__floatunditf,
+	__floatundixf,
 	__gcc_bcmp,
 	__gcc_personality_v0,
 #	__lshrdi3,
@@ -64,6 +72,7 @@
 	_Unwind_GetDataRelBase,
 	_Unwind_GetGR,
 	_Unwind_GetIP,
+	_Unwind_GetIPInfo,
 	_Unwind_GetLanguageSpecificData,
 	_Unwind_GetRegionStart,
 	_Unwind_GetTextRelBase,
diff --git a/gcc/config/i386/netware.c b/gcc/config/i386/netware.c
index 0357baff169..63c26cc7551 100644
--- a/gcc/config/i386/netware.c
+++ b/gcc/config/i386/netware.c
@@ -1,6 +1,6 @@
 /* Subroutines for insn-output.c for NetWare.
    Contributed by Jan Beulich (jbeulich@novell.com)
-   Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
+   Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -32,28 +32,25 @@ along with GCC; see the file COPYING3.  If not see
 #include "toplev.h"
 #include "ggc.h"
 
-
-/* Return string which is the former assembler name modified with an 
-   underscore prefix and a suffix consisting of an atsign (@) followed
-   by the number of bytes of arguments */
+/* Return string which is the function name, identified by ID, modified
+   with PREFIX and a suffix consisting of an atsign (@) followed by the
+   number of bytes of arguments.  If ID is NULL use the DECL_NAME as base.
+   Return NULL if no change required.  */
 
 static tree
-gen_stdcall_or_fastcall_decoration (tree decl, char prefix)
+gen_stdcall_or_fastcall_decoration (tree decl, tree id, char prefix)
 {
-  unsigned total = 0;
-  /* ??? This probably should use XSTR (XEXP (DECL_RTL (decl), 0), 0) instead
-     of DECL_ASSEMBLER_NAME.  */
-  const char *asmname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
-  char *newsym;
+  unsigned HOST_WIDE_INT total = 0;
+  const char *old_str = IDENTIFIER_POINTER (id != NULL_TREE ? id : DECL_NAME (decl));
+  char *new_str;
   tree type = TREE_TYPE (decl);
-  tree arg;
-  function_args_iterator args_iter;
 
   if (prototype_p (type))
     {
-      /* These attributes are ignored for variadic functions in
-	 i386.c:ix86_return_pops_args. For compatibility with MS
-	 compiler do not add @0 suffix here.  */ 
+      tree arg;
+      function_args_iterator args_iter;
+
+      /* This attribute is ignored for variadic functions.  */ 
       if (stdarg_p (type))
 	return NULL_TREE;
 
@@ -61,50 +58,50 @@ gen_stdcall_or_fastcall_decoration (tree decl, char prefix)
 	 by convert_arguments in c-typeck.c or cp/typeck.c.  */
       FOREACH_FUNCTION_ARGS(type, arg, args_iter)
 	{
-	  unsigned parm_size;
+	  HOST_WIDE_INT parm_size;
+	  unsigned HOST_WIDE_INT parm_boundary_bytes;
 
 	  if (! COMPLETE_TYPE_P (arg))
 	    break;
 
-	  parm_size = int_size_in_bytes (TYPE_SIZE (arg));
+	  parm_size = int_size_in_bytes (arg);
 	  if (parm_size < 0)
 	    break;
 
+	  parm_boundary_bytes = PARM_BOUNDARY / BITS_PER_UNIT;
+
 	  /* Must round up to include padding.  This is done the same
 	     way as in store_one_arg.  */
-	  parm_size = ((parm_size + PARM_BOUNDARY - 1)
-		       / PARM_BOUNDARY * PARM_BOUNDARY);
-	  total += parm_size;
+	  total += (parm_size + parm_boundary_bytes - 1)
+		   / parm_boundary_bytes * parm_boundary_bytes;
 	}
     }
 
-  newsym = alloca (1 + strlen (asmname) + 1 + 10 + 1);
-  return get_identifier_with_length (newsym,
-				     sprintf (newsym,
-					      "%c%s@%u",
-					      prefix,
-					      asmname,
-					      total / BITS_PER_UNIT));
+  new_str = alloca (1 + strlen (old_str) + 1 + 10 + 1);
+  sprintf (new_str, "%c%s@" HOST_WIDE_INT_PRINT_UNSIGNED,
+	   prefix, old_str, total);
+
+  return get_identifier (new_str);
 }
 
-/* Return string which is the former assembler name modified with an 
-   _n@ prefix where n represents the number of arguments passed in
-   registers */
+/* Return string which is the function name, identified by ID, modified
+   with an _n@ prefix (where n represents the number of arguments passed in
+   registers).  If ID is NULL use the DECL_NAME as base.
+   Return NULL if no change required.  */
 
 static tree
-gen_regparm_prefix (tree decl, unsigned nregs)
+gen_regparm_prefix (tree decl, tree id, unsigned int nregs)
 {
-  unsigned total = 0;
-  /* ??? This probably should use XSTR (XEXP (DECL_RTL (decl), 0), 0) instead
-     of DECL_ASSEMBLER_NAME.  */
-  const char *asmname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
-  char *newsym;
+  unsigned HOST_WIDE_INT total = 0;
+  const char *old_str = IDENTIFIER_POINTER (id != NULL_TREE ? id : DECL_NAME (decl));
+  char *new_str;
   tree type = TREE_TYPE (decl);
-  tree arg;
-  function_args_iterator args_iter;
 
   if (prototype_p (type))
     {
+      tree arg;
+      function_args_iterator args_iter;
+
       /* This attribute is ignored for variadic functions.  */ 
       if (stdarg_p (type))
 	return NULL_TREE;
@@ -113,7 +110,8 @@ gen_regparm_prefix (tree decl, unsigned nregs)
 	 by convert_arguments in c-typeck.c or cp/typeck.c.  */
       FOREACH_FUNCTION_ARGS(type, arg, args_iter)
 	{
-	  unsigned parm_size;
+	  HOST_WIDE_INT parm_size;
+	  unsigned HOST_WIDE_INT parm_boundary_bytes;
 
 	  if (! COMPLETE_TYPE_P (arg))
 	    break;
@@ -122,21 +120,58 @@ gen_regparm_prefix (tree decl, unsigned nregs)
 	  if (parm_size < 0)
 	    break;
 
-	  parm_size = ((parm_size + PARM_BOUNDARY - 1)
-		       / PARM_BOUNDARY * PARM_BOUNDARY);
-	  total += parm_size;
+	  parm_boundary_bytes = PARM_BOUNDARY / BITS_PER_UNIT;
+
+	  /* Must round up to include padding.  This is done the same
+	     way as in store_one_arg.  */
+	  total += (parm_size + parm_boundary_bytes - 1)
+		   / parm_boundary_bytes * parm_boundary_bytes;
 	}
     }
 
-  if (nregs > total / BITS_PER_WORD)
-    nregs = total / BITS_PER_WORD;
+  if (nregs > total / UNITS_PER_WORD)
+    nregs = total / UNITS_PER_WORD;
   gcc_assert (nregs <= 9);
-  newsym = alloca (3 + strlen (asmname) + 1);
-  return get_identifier_with_length (newsym,
-				     sprintf (newsym,
-					      "_%u@%s",
-					      nregs,
-					      asmname));
+  new_str = alloca (3 + strlen (old_str) + 1);
+  sprintf (new_str, "_%u@%s", nregs, old_str);
+
+  return get_identifier (new_str);
+}
+
+/* Maybe decorate and get a new identifier for the DECL of a stdcall or
+   fastcall function. The original identifier is supplied in ID. */
+
+static tree
+i386_nlm_maybe_mangle_decl_assembler_name (tree decl, tree id)
+{
+  tree type_attributes = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+  tree new_id;
+
+  if (lookup_attribute ("stdcall", type_attributes))
+    new_id = gen_stdcall_or_fastcall_decoration (decl, id, '_');
+  else if (lookup_attribute ("fastcall", type_attributes))
+    new_id = gen_stdcall_or_fastcall_decoration (decl, id, FASTCALL_PREFIX);
+  else if ((new_id = lookup_attribute ("regparm", type_attributes)))
+    new_id = gen_regparm_prefix (decl, id,
+		  TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (new_id))));
+  else
+    new_id = NULL_TREE;
+
+  return new_id;
+}
+
+/* This is used as a target hook to modify the DECL_ASSEMBLER_NAME
+   in the language-independent default hook
+   langhooks.c:lhd_set_decl_assembler_name ()
+   and in cp/mangle.c:mangle_decl ().  */
+tree
+i386_nlm_mangle_decl_assembler_name (tree decl, tree id)
+{
+  tree new_id = TREE_CODE (decl) == FUNCTION_DECL
+		? i386_nlm_maybe_mangle_decl_assembler_name (decl, id)
+		: NULL_TREE;
+
+  return (new_id ? new_id : id);
 }
 
 void
@@ -146,31 +181,28 @@ i386_nlm_encode_section_info (tree decl, rtx rtl, int first)
 
   if (first
       && TREE_CODE (decl) == FUNCTION_DECL
+      /* Do not change the identifier if a verbatim asmspec
+	 or if stdcall suffix already added.  */
       && *IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)) != '*'
       && !strchr (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), '@'))
     {
-      tree type_attributes = TYPE_ATTRIBUTES (TREE_TYPE (decl));
-      tree newid;
-
-      if (lookup_attribute ("stdcall", type_attributes))
-	newid = gen_stdcall_or_fastcall_decoration (decl, '_');
-      else if (lookup_attribute ("fastcall", type_attributes))
-	newid = gen_stdcall_or_fastcall_decoration (decl, FASTCALL_PREFIX);
-      else if ((newid = lookup_attribute ("regparm", type_attributes)) != NULL_TREE)
-	newid = gen_regparm_prefix (decl,
-		      TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (newid))));
-      if (newid != NULL_TREE) 	
-	{
-	  rtx rtlname = XEXP (rtl, 0);
+      /* FIXME: In Ada, and perhaps other language frontends,
+	 imported stdcall names may not yet have been modified.
+	 Check and do it know.  */
+      rtx symbol = XEXP (rtl, 0);
+      tree new_id;
+      tree old_id = DECL_ASSEMBLER_NAME (decl);
+
+      gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
 
-	  if (GET_CODE (rtlname) == MEM)
-	    rtlname = XEXP (rtlname, 0);
-	  XSTR (rtlname, 0) = IDENTIFIER_POINTER (newid);
+      if ((new_id = i386_nlm_maybe_mangle_decl_assembler_name (decl, old_id)))
+	{
 	  /* These attributes must be present on first declaration,
 	     change_decl_assembler_name will warn if they are added
 	     later and the decl has been referenced, but duplicate_decls
-	     should catch the mismatch before this is called.  */ 
-	  change_decl_assembler_name (decl, newid);
+	     should catch the mismatch first.  */
+	  change_decl_assembler_name (decl, new_id);
+	  XSTR (symbol, 0) = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
 	}
     }
 }
@@ -201,3 +233,24 @@ i386_nlm_strip_name_encoding (const char *str)
     }
   return name;
 }
+
+/* Sometimes certain combinations of command options do not make
+   sense on a particular target machine.  You can define a macro
+   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
+   defined, is executed once just after all the command options have
+   been parsed.
+
+   Don't use this macro to turn on various extra optimizations for
+   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
+
+void
+netware_override_options (void)
+{
+  override_options ();
+
+  if (flag_pic)
+    {
+      error ("-fPIC and -fpic are not supported for this target");
+      flag_pic = 0;
+    }
+}
diff --git a/gcc/config/i386/netware.h b/gcc/config/i386/netware.h
index e7459a2fe79..d4f31e0bbb2 100644
--- a/gcc/config/i386/netware.h
+++ b/gcc/config/i386/netware.h
@@ -72,6 +72,18 @@ along with GCC; see the file COPYING3.  If not see
 #define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | \
 	MASK_FLOAT_RETURNS | MASK_ALIGN_DOUBLE | MASK_MS_BITFIELD_LAYOUT)
 
+/* Sometimes certain combinations of command options do not make
+   sense on a particular target machine.  You can define a macro
+   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
+   defined, is executed once just after all the command options have
+   been parsed.
+
+   Don't use this macro to turn on various extra optimizations for
+   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
+#undef  OVERRIDE_OPTIONS
+extern void netware_override_options (void);
+#define OVERRIDE_OPTIONS netware_override_options ()
+
 #undef MATH_LIBRARY
 #define MATH_LIBRARY ""
 
@@ -142,13 +154,15 @@ along with GCC; see the file COPYING3.  If not see
    function named by the symbol (such as what section it is in).
 
    On i386 running NetWare, modify the assembler name with an underscore (_)
-   prefix and a suffix consisting of an atsign (@) followed by a string of
-   digits that represents the number of bytes of arguments passed to the
-   function, if it has the attribute STDCALL. Alternatively, if it has the 
-   REGPARM attribute, prefix it with an underscore (_), a digit representing
-   the number of registers used, and an atsign (@). */
+   or atsign (@) prefix and a suffix consisting of an atsign (@) followed by
+   a string of digits that represents the number of bytes of arguments passed
+   to the function, if it has the attribute STDCALL. Alternatively, if it has
+   the REGPARM attribute, prefix it with an underscore (_), a digit
+   representing the number of registers used, and an atsign (@). */
 void i386_nlm_encode_section_info (tree, rtx, int);
+extern tree i386_nlm_mangle_decl_assembler_name (tree, tree);
 const char *i386_nlm_strip_name_encoding (const char *);
 #define SUBTARGET_ENCODE_SECTION_INFO  i386_nlm_encode_section_info
+#define TARGET_MANGLE_DECL_ASSEMBLER_NAME i386_nlm_mangle_decl_assembler_name
 #undef  TARGET_STRIP_NAME_ENCODING
 #define TARGET_STRIP_NAME_ENCODING  i386_nlm_strip_name_encoding
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 43f7ced8c03..97250dbd2ed 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -36,6 +36,9 @@
 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
 
+;; Mapping from float mode to required SSE level
+(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
+
 ;; Mapping from integer vector mode to mnemonic suffix
 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
 
@@ -137,12 +140,12 @@
       gcc_unreachable ();
 })
 
-(define_expand "movv4sf"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
-	(match_operand:V4SF 1 "nonimmediate_operand" ""))]
+(define_expand "mov<mode>"
+  [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "")
+	(match_operand:SSEMODEF2P 1 "nonimmediate_operand" ""))]
   "TARGET_SSE"
 {
-  ix86_expand_vector_move (V4SFmode, operands);
+  ix86_expand_vector_move (<MODE>mode, operands);
   DONE;
 })
 
@@ -181,15 +184,6 @@
   operands[2] = CONST0_RTX (V4SFmode);
 })
 
-(define_expand "movv2df"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
-	(match_operand:V2DF 1 "nonimmediate_operand" ""))]
-  "TARGET_SSE"
-{
-  ix86_expand_vector_move (V2DFmode, operands);
-  DONE;
-})
-
 (define_insn "*movv2df_internal"
   [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
 	(match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
@@ -249,23 +243,16 @@
   DONE;
 })
 
-(define_insn "sse_movups"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
-	(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
-		     UNSPEC_MOVU))]
-  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "movups\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_movupd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
-	(unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
-		     UNSPEC_MOVU))]
-  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "movupd\t{%1, %0|%0, %1}"
+(define_insn "<sse>_movup<ssemodesuffixf2c>"
+  [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
+	  UNSPEC_MOVU))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
-   (set_attr "mode" "V2DF")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "sse2_movdqu"
   [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
@@ -277,23 +264,15 @@
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "sse_movntv4sf"
-  [(set (match_operand:V4SF 0 "memory_operand" "=m")
-	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
-		     UNSPEC_MOVNT))]
-  "TARGET_SSE"
-  "movntps\t{%1, %0|%0, %1}"
+(define_insn "<sse>_movnt<mode>"
+  [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
+	  UNSPEC_MOVNT))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse2_movntv2df"
-  [(set (match_operand:V2DF 0 "memory_operand" "=m")
-	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
-		     UNSPEC_MOVNT))]
-  "TARGET_SSE2"
-  "movntpd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "sse2_movntv2di"
   [(set (match_operand:V2DI 0 "memory_operand" "=m")
@@ -328,18 +307,20 @@
 ; that directly map to insns are defined; it would be possible to
 ; define patterns for other modes that would expand to several insns.
 
-(define_expand "storentv4sf"
-  [(set (match_operand:V4SF 0 "memory_operand" "")
-	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")]
-		     UNSPEC_MOVNT))]
-  "TARGET_SSE"
+(define_expand "storent<mode>"
+  [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "register_operand" "")]
+	  UNSPEC_MOVNT))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
   "")
 
-(define_expand "storentv2df"
-  [(set (match_operand:V2DF 0 "memory_operand" "")
-	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "")]
-		     UNSPEC_MOVNT))]
-  "TARGET_SSE2"
+(define_expand "storent<mode>"
+  [(set (match_operand:MODEF 0 "memory_operand" "")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "")]
+	  UNSPEC_MOVNT))]
+  "TARGET_SSE4A"
   "")
 
 (define_expand "storentv2di"
@@ -356,121 +337,120 @@
   "TARGET_SSE2"
   "")
 
-(define_expand "storentdf"
-  [(set (match_operand:DF 0 "memory_operand" "")
-	(unspec:DF [(match_operand:DF 1 "register_operand" "")]
-		   UNSPEC_MOVNT))]
-  "TARGET_SSE4A"
-  "")
-
-(define_expand "storentsf"
-  [(set (match_operand:SF 0 "memory_operand" "")
-	(unspec:SF [(match_operand:SF 1 "register_operand" "")]
-		   UNSPEC_MOVNT))]
-  "TARGET_SSE4A"
-  "")
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
-;; Parallel single-precision floating point arithmetic
+;; Parallel floating point arithmetic
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_expand "negv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(neg:V4SF (match_operand:V4SF 1 "register_operand" "")))]
-  "TARGET_SSE"
-  "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
+(define_expand "neg<mode>2"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+	(neg:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "ix86_expand_fp_absneg_operator (NEG, <MODE>mode, operands); DONE;")
 
-(define_expand "absv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(abs:V4SF (match_operand:V4SF 1 "register_operand" "")))]
-  "TARGET_SSE"
-  "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
+(define_expand "abs<mode>2"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+	(abs:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "ix86_expand_fp_absneg_operator (ABS, <MODE>mode, operands); DONE;")
 
-(define_expand "addv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
-		   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-  "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
+(define_expand "add<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+	(plus:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
 
-(define_insn "*addv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
-		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
-  "addps\t{%2, %0|%0, %2}"
+(define_insn "*add<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(plus:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "addp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "V4SF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "sse_vmaddv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_merge:V4SF
-	  (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
-		     (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+(define_insn "<sse>_vmadd<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (plus:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
 	  (match_dup 1)
 	  (const_int 1)))]
-  "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
-  "addss\t{%2, %0|%0, %2}"
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+   && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
+  "adds<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<ssescalarmode>")])
 
-(define_expand "subv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(minus:V4SF (match_operand:V4SF 1 "register_operand" "")
-		    (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-  "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
+(define_expand "sub<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+	(minus:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "register_operand" "")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
 
-(define_insn "*subv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
-		    (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "subps\t{%2, %0|%0, %2}"
+(define_insn "*sub<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(minus:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "subp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "V4SF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "sse_vmsubv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_merge:V4SF
-	  (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
-		      (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+(define_insn "<sse>_vmsub<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (minus:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
 	  (match_dup 1)
 	  (const_int 1)))]
-  "TARGET_SSE"
-  "subss\t{%2, %0|%0, %2}"
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "subs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<ssescalarmode>")])
 
-(define_expand "mulv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
-		   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-  "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
+(define_expand "mul<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+	(mult:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
 
-(define_insn "*mulv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
-		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
-  "mulps\t{%2, %0|%0, %2}"
+(define_insn "*mul<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(mult:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+   && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+  "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssemul")
-   (set_attr "mode" "V4SF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "sse_vmmulv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_merge:V4SF
-	  (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
-		     (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+(define_insn "<sse>_vmmul<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (mult:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
 	  (match_dup 1)
 	  (const_int 1)))]
-  "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
-  "mulss\t{%2, %0|%0, %2}"
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+   && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+  "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssemul")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<ssescalarmode>")])
 
 (define_expand "divv4sf3"
   [(set (match_operand:V4SF 0 "register_operand" "")
@@ -490,31 +470,40 @@
     }
 })
 
-(define_insn "sse_divv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(div:V4SF (match_operand:V4SF 1 "register_operand" "0")
-		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "divps\t{%2, %0|%0, %2}"
+(define_expand "divv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+	(div:V2DF (match_operand:V2DF 1 "register_operand" "")
+		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
+
+(define_insn "<sse>_div<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(div:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssediv")
-   (set_attr "mode" "V4SF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "sse_vmdivv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_merge:V4SF
-	  (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
-		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+(define_insn "<sse>_vmdiv<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (div:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
 	  (match_dup 1)
 	  (const_int 1)))]
-  "TARGET_SSE"
-  "divss\t{%2, %0|%0, %2}"
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssediv")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<ssescalarmode>")])
 
 (define_insn "sse_rcpv4sf2"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
 	(unspec:V4SF
-	 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
+	  [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
   "TARGET_SSE"
   "rcpps\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")
@@ -532,6 +521,48 @@
   [(set_attr "type" "sse")
    (set_attr "mode" "SF")])
 
+(define_expand "sqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+	(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+{
+  if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
+      DONE;
+    }
+})
+
+(define_insn "sse_sqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "sqrtps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sqrtv2df2"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "sqrtpd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "<sse>_vmsqrt<mode>2"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (sqrt:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
+	  (match_operand:SSEMODEF2P 2 "register_operand" "0")
+	  (const_int 1)))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "<ssescalarmode>")])
+
 (define_expand "rsqrtv4sf2"
   [(set (match_operand:V4SF 0 "register_operand" "")
 	(unspec:V4SF
@@ -563,126 +594,101 @@
   [(set_attr "type" "sse")
    (set_attr "mode" "SF")])
 
-(define_expand "sqrtv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-{
-  if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
-      && flag_finite_math_only && !flag_trapping_math
-      && flag_unsafe_math_optimizations)
-    {
-      ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
-      DONE;
-    }
-})
-
-(define_insn "sse_sqrtv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "sqrtps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse_vmsqrtv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_merge:V4SF
-	  (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
-	  (match_operand:V4SF 2 "register_operand" "0")
-	  (const_int 1)))]
-  "TARGET_SSE"
-  "sqrtss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
-
 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
 ;; isn't really correct, as those rtl operators aren't defined when
 ;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
 
-(define_expand "smaxv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
-		   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
+(define_expand "smin<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+	(smin:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
 {
   if (!flag_finite_math_only)
-    operands[1] = force_reg (V4SFmode, operands[1]);
-  ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+  ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
 })
 
-(define_insn "*smaxv4sf3_finite"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
-		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE && flag_finite_math_only
-   && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
-  "maxps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
+(define_insn "*smin<mode>3_finite"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(smin:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
+   && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
+  "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*smaxv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
-		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "maxps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
+(define_insn "*smin<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(smin:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "sse_vmsmaxv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_merge:V4SF
-	 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
-		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+(define_insn "<sse>_vmsmin<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (smin:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
 	 (match_dup 1)
 	 (const_int 1)))]
-  "TARGET_SSE"
-  "maxss\t{%2, %0|%0, %2}"
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "mins<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<ssescalarmode>")])
 
-(define_expand "sminv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
-		   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
+(define_expand "smax<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+	(smax:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
 {
   if (!flag_finite_math_only)
-    operands[1] = force_reg (V4SFmode, operands[1]);
-  ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+  ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
 })
 
-(define_insn "*sminv4sf3_finite"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
-		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE && flag_finite_math_only
-   && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
-  "minps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
+(define_insn "*smax<mode>3_finite"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(smax:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
+   && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
+  "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*sminv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
-		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "minps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
+(define_insn "*smax<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(smax:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "sse_vmsminv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_merge:V4SF
-	 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
-		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-	 (match_dup 1)
-	 (const_int 1)))]
-  "TARGET_SSE"
-  "minss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
+(define_insn "<sse>_vmsmax<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (smax:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "maxs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<ssescalarmode>")])
 
 ;; These versions of the min/max patterns implement exactly the operations
 ;;   min = (op1 < op2 ? op1 : op2)
@@ -690,45 +696,27 @@
 ;; Their operands are not commutative, and thus they may be used in the
 ;; presence of -0.0 and NaN.
 
-(define_insn "*ieee_sminv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
-		      (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
-		     UNSPEC_IEEE_MIN))]
-  "TARGET_SSE"
-  "minps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "*ieee_smaxv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
-		      (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
-		     UNSPEC_IEEE_MAX))]
-  "TARGET_SSE"
-  "maxps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "*ieee_sminv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
-		      (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
-		     UNSPEC_IEEE_MIN))]
-  "TARGET_SSE2"
-  "minpd\t{%2, %0|%0, %2}"
+(define_insn "*ieee_smin<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "register_operand" "0")
+	   (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MIN))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*ieee_smaxv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
-		      (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
-		     UNSPEC_IEEE_MAX))]
-  "TARGET_SSE2"
-  "maxpd\t{%2, %0|%0, %2}"
+(define_insn "*ieee_smax<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "register_operand" "0")
+	   (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MAX))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "sse3_addsubv4sf3"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -744,6 +732,19 @@
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "V4SF")])
 
+(define_insn "sse3_addsubv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_merge:V2DF
+	  (plus:V2DF
+	    (match_operand:V2DF 1 "register_operand" "0")
+	    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+	  (minus:V2DF (match_dup 1) (match_dup 2))
+	  (const_int 1)))]
+  "TARGET_SSE3"
+  "addsubpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V2DF")])
+
 (define_insn "sse3_haddv4sf3"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
 	(vec_concat:V4SF
@@ -771,6 +772,24 @@
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "V4SF")])
 
+(define_insn "sse3_haddv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_concat:V2DF
+	  (plus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 1 "register_operand" "0")
+	      (parallel [(const_int 0)]))
+	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+	  (plus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 0)]))
+	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_SSE3"
+  "haddpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V2DF")])
+
 (define_insn "sse3_hsubv4sf3"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
 	(vec_concat:V4SF
@@ -798,6 +817,24 @@
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "V4SF")])
 
+(define_insn "sse3_hsubv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_concat:V2DF
+	  (minus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 1 "register_operand" "0")
+	      (parallel [(const_int 0)]))
+	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+	  (minus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 0)]))
+	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_SSE3"
+  "hsubpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V2DF")])
+
 (define_expand "reduc_splus_v4sf"
   [(match_operand:V4SF 0 "register_operand" "")
    (match_operand:V4SF 1 "register_operand" "")]
@@ -814,6 +851,15 @@
   DONE;
 })
 
+(define_expand "reduc_splus_v2df"
+  [(match_operand:V2DF 0 "register_operand" "")
+   (match_operand:V2DF 1 "register_operand" "")]
+  "TARGET_SSE3"
+{
+  emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
+  DONE;
+})
+
 (define_expand "reduc_smax_v4sf"
   [(match_operand:V4SF 0 "register_operand" "")
    (match_operand:V4SF 1 "register_operand" "")]
@@ -834,80 +880,71 @@
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
-;; Parallel single-precision floating point comparisons
+;; Parallel floating point comparisons
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "sse_maskcmpv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(match_operator:V4SF 3 "sse_comparison_operator"
-		[(match_operand:V4SF 1 "register_operand" "0")
-		 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE && !TARGET_SSE5"
-  "cmp%D3ps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse_maskcmpsf3"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(match_operator:SF 3 "sse_comparison_operator"
-		[(match_operand:SF 1 "register_operand" "0")
-		 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE && !TARGET_SSE5"
-  "cmp%D3ss\t{%2, %0|%0, %2}"
+(define_insn "<sse>_maskcmp<mode>3"
+  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
+	(match_operator:SSEMODEF4 3 "sse_comparison_operator"
+		[(match_operand:SSEMODEF4 1 "register_operand" "0")
+		 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
+  "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
+   && !TARGET_SSE5"
+  "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecmp")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "sse_vmmaskcmpv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_merge:V4SF
-	 (match_operator:V4SF 3 "sse_comparison_operator"
-		[(match_operand:V4SF 1 "register_operand" "0")
-		 (match_operand:V4SF 2 "register_operand" "x")])
+(define_insn "<sse>_vmmaskcmp<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
+		[(match_operand:SSEMODEF2P 1 "register_operand" "0")
+		 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
 	 (match_dup 1)
 	 (const_int 1)))]
-  "TARGET_SSE && !TARGET_SSE5"
-  "cmp%D3ss\t{%2, %0|%0, %2}"
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
+  "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecmp")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<ssescalarmode>")])
 
-(define_insn "sse_comi"
+(define_insn "<sse>_comi"
   [(set (reg:CCFP FLAGS_REG)
 	(compare:CCFP
-	  (vec_select:SF
-	    (match_operand:V4SF 0 "register_operand" "x")
+	  (vec_select:MODEF
+	    (match_operand:<ssevecmode> 0 "register_operand" "x")
 	    (parallel [(const_int 0)]))
-	  (vec_select:SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+	  (vec_select:MODEF
+	    (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 0)]))))]
-  "TARGET_SSE"
-  "comiss\t{%1, %0|%0, %1}"
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+  "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecomi")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "sse_ucomi"
+(define_insn "<sse>_ucomi"
   [(set (reg:CCFPU FLAGS_REG)
 	(compare:CCFPU
-	  (vec_select:SF
-	    (match_operand:V4SF 0 "register_operand" "x")
+	  (vec_select:MODEF
+	    (match_operand:<ssevecmode> 0 "register_operand" "x")
 	    (parallel [(const_int 0)]))
-	  (vec_select:SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+	  (vec_select:MODEF
+	    (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 0)]))))]
-  "TARGET_SSE"
-  "ucomiss\t{%1, %0|%0, %1}"
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+  "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecomi")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_expand "vcondv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-        (if_then_else:V4SF
+(define_expand "vcond<mode>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+        (if_then_else:SSEMODEF2P
           (match_operator 3 ""
-            [(match_operand:V4SF 4 "nonimmediate_operand" "")
-             (match_operand:V4SF 5 "nonimmediate_operand" "")])
-          (match_operand:V4SF 1 "general_operand" "")
-          (match_operand:V4SF 2 "general_operand" "")))]
-  "TARGET_SSE"
+            [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
+             (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
+          (match_operand:SSEMODEF2P 1 "general_operand" "")
+          (match_operand:SSEMODEF2P 2 "general_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
 {
   if (ix86_expand_fp_vcond (operands))
     DONE;
@@ -917,666 +954,123 @@
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
-;; Parallel single-precision floating point logical operations
+;; Parallel floating point logical operations
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_expand "andv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
-		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-  "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
+(define_expand "and<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+	(and:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
 
-(define_insn "*andv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
-		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
-  "andps\t{%2, %0|%0, %2}"
+(define_insn "*and<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(and:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+   && ix86_binary_operator_ok (AND, V4SFmode, operands)"
+  "andp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "sse_nandv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
-		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "andnps\t{%2, %0|%0, %2}"
+(define_insn "<sse>_nand<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(and:SSEMODEF2P
+	  (not:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "0"))
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_expand "iorv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
-		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-  "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
+(define_expand "ior<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+	(ior:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
 
-(define_insn "*iorv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
-		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
-  "orps\t{%2, %0|%0, %2}"
+(define_insn "*ior<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(ior:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+   && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
+  "orp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_expand "xorv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
-		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-  "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
+(define_expand "xor<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+	(xor:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
 
-(define_insn "*xorv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
-		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
-  "xorps\t{%2, %0|%0, %2}"
+(define_insn "*xor<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(xor:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+   && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
+  "xorp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
+   (set_attr "mode" "<MODE>")])
 
 ;; Also define scalar versions.  These are used for abs, neg, and
 ;; conditional move.  Using subregs into vector modes causes register
 ;; allocation lossage.  These patterns do not allow memory operands
 ;; because the native instructions read the full 128-bits.
 
-(define_insn "*andsf3"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(and:SF (match_operand:SF 1 "register_operand" "0")
-		(match_operand:SF 2 "register_operand" "x")))]
-  "TARGET_SSE"
-  "andps\t{%2, %0|%0, %2}"
+(define_insn "*and<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(and:MODEF
+	  (match_operand:MODEF 1 "register_operand" "0")
+	  (match_operand:MODEF 2 "register_operand" "x")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+  "andp<ssemodefsuffix>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "*nandsf3"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
-		(match_operand:SF 2 "register_operand" "x")))]
-  "TARGET_SSE"
-  "andnps\t{%2, %0|%0, %2}"
+   (set_attr "mode" "<ssevecmode>")])
+
+(define_insn "*nand<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(and:MODEF
+	  (not:MODEF
+	    (match_operand:MODEF 1 "register_operand" "0"))
+	    (match_operand:MODEF 2 "register_operand" "x")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+  "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "*iorsf3"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(ior:SF (match_operand:SF 1 "register_operand" "0")
-		(match_operand:SF 2 "register_operand" "x")))]
-  "TARGET_SSE"
-  "orps\t{%2, %0|%0, %2}"
+   (set_attr "mode" "<ssevecmode>")])
+
+(define_insn "*ior<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(ior:MODEF
+	  (match_operand:MODEF 1 "register_operand" "0")
+	  (match_operand:MODEF 2 "register_operand" "x")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+  "orp<ssemodefsuffix>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "*xorsf3"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(xor:SF (match_operand:SF 1 "register_operand" "0")
-		(match_operand:SF 2 "register_operand" "x")))]
-  "TARGET_SSE"
-  "xorps\t{%2, %0|%0, %2}"
+   (set_attr "mode" "<ssevecmode>")])
+
+(define_insn "*xor<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(xor:MODEF
+	  (match_operand:MODEF 1 "register_operand" "0")
+	  (match_operand:MODEF 2 "register_operand" "x")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+  "xorp<ssemodefsuffix>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; Parallel single-precision floating point conversion operations
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_insn "sse_cvtpi2ps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_merge:V4SF
-	  (vec_duplicate:V4SF
-	    (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
-	  (match_operand:V4SF 1 "register_operand" "0")
-	  (const_int 3)))]
-  "TARGET_SSE"
-  "cvtpi2ps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse_cvtps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-	(vec_select:V2SI
-	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
-		       UNSPEC_FIX_NOTRUNC)
-	  (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_SSE"
-  "cvtps2pi\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx")
-   (set_attr "mode" "DI")])
-
-(define_insn "sse_cvttps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-	(vec_select:V2SI
-	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
-	  (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_SSE"
-  "cvttps2pi\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx")
-   (set_attr "mode" "SF")])
-
-(define_insn "sse_cvtsi2ss"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-	(vec_merge:V4SF
-	  (vec_duplicate:V4SF
-	    (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
-	  (match_operand:V4SF 1 "register_operand" "0,0")
-	  (const_int 1)))]
-  "TARGET_SSE"
-  "cvtsi2ss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "vector,double")
-   (set_attr "amdfam10_decode" "vector,double")
-   (set_attr "mode" "SF")])
-
-(define_insn "sse_cvtsi2ssq"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-	(vec_merge:V4SF
-	  (vec_duplicate:V4SF
-	    (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
-	  (match_operand:V4SF 1 "register_operand" "0,0")
-	  (const_int 1)))]
-  "TARGET_SSE && TARGET_64BIT"
-  "cvtsi2ssq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "vector,double")
-   (set_attr "amdfam10_decode" "vector,double")
-   (set_attr "mode" "SF")])
-
-(define_insn "sse_cvtss2si"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(unspec:SI
-	  [(vec_select:SF
-	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
-	     (parallel [(const_int 0)]))]
-	  UNSPEC_FIX_NOTRUNC))]
-  "TARGET_SSE"
-  "cvtss2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "prefix_rep" "1")
-   (set_attr "mode" "SI")])
-
-(define_insn "sse_cvtss2si_2"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
-	 UNSPEC_FIX_NOTRUNC))]
-  "TARGET_SSE"
-  "cvtss2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "amdfam10_decode" "double,double")
-   (set_attr "prefix_rep" "1")
-   (set_attr "mode" "SI")])
-
-(define_insn "sse_cvtss2siq"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(unspec:DI
-	  [(vec_select:SF
-	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
-	     (parallel [(const_int 0)]))]
-	  UNSPEC_FIX_NOTRUNC))]
-  "TARGET_SSE && TARGET_64BIT"
-  "cvtss2siq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "prefix_rep" "1")
-   (set_attr "mode" "DI")])
-
-(define_insn "sse_cvtss2siq_2"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
-	 UNSPEC_FIX_NOTRUNC))]
-  "TARGET_SSE && TARGET_64BIT"
-  "cvtss2siq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "amdfam10_decode" "double,double")
-   (set_attr "prefix_rep" "1")
-   (set_attr "mode" "DI")])
-
-(define_insn "sse_cvttss2si"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(fix:SI
-	  (vec_select:SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
-	    (parallel [(const_int 0)]))))]
-  "TARGET_SSE"
-  "cvttss2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "amdfam10_decode" "double,double")
-   (set_attr "prefix_rep" "1")
-   (set_attr "mode" "SI")])
-
-(define_insn "sse_cvttss2siq"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(fix:DI
-	  (vec_select:SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
-	    (parallel [(const_int 0)]))))]
-  "TARGET_SSE && TARGET_64BIT"
-  "cvttss2siq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "amdfam10_decode" "double,double")
-   (set_attr "prefix_rep" "1")
-   (set_attr "mode" "DI")])
-
-(define_insn "sse2_cvtdq2ps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "cvtdq2ps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse2_cvtps2dq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-	(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
-		     UNSPEC_FIX_NOTRUNC))]
-  "TARGET_SSE2"
-  "cvtps2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "prefix_data16" "1")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_cvttps2dq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-	(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "cvttps2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "prefix_rep" "1")
-   (set_attr "mode" "TI")])
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; Parallel single-precision floating point element swizzling
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_insn "sse_movhlps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,m")
-	(vec_select:V4SF
-	  (vec_concat:V8SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
-	    (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
-	  (parallel [(const_int 6)
-		     (const_int 7)
-		     (const_int 2)
-		     (const_int 3)])))]
-  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "@
-   movhlps\t{%2, %0|%0, %2}
-   movlps\t{%H2, %0|%0, %H2}
-   movhps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "V4SF,V2SF,V2SF")])
-
-(define_insn "sse_movlhps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,o")
-	(vec_select:V4SF
-	  (vec_concat:V8SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
-	    (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
-	  (parallel [(const_int 0)
-		     (const_int 1)
-		     (const_int 4)
-		     (const_int 5)])))]
-  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
-  "@
-   movlhps\t{%2, %0|%0, %2}
-   movhps\t{%2, %0|%0, %2}
-   movlps\t{%2, %H0|%H0, %2}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "V4SF,V2SF,V2SF")])
-
-(define_insn "sse_unpckhps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_select:V4SF
-	  (vec_concat:V8SF
-	    (match_operand:V4SF 1 "register_operand" "0")
-	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-	  (parallel [(const_int 2) (const_int 6)
-		     (const_int 3) (const_int 7)])))]
-  "TARGET_SSE"
-  "unpckhps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse_unpcklps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_select:V4SF
-	  (vec_concat:V8SF
-	    (match_operand:V4SF 1 "register_operand" "0")
-	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-	  (parallel [(const_int 0) (const_int 4)
-		     (const_int 1) (const_int 5)])))]
-  "TARGET_SSE"
-  "unpcklps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
-
-;; These are modeled with the same vec_concat as the others so that we
-;; capture users of shufps that can use the new instructions
-(define_insn "sse3_movshdup"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_select:V4SF
-	  (vec_concat:V8SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
-	    (match_dup 1))
-	  (parallel [(const_int 1)
-		     (const_int 1)
-		     (const_int 7)
-		     (const_int 7)])))]
-  "TARGET_SSE3"
-  "movshdup\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "prefix_rep" "1")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse3_movsldup"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_select:V4SF
-	  (vec_concat:V8SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
-	    (match_dup 1))
-	  (parallel [(const_int 0)
-		     (const_int 0)
-		     (const_int 6)
-		     (const_int 6)])))]
-  "TARGET_SSE3"
-  "movsldup\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "prefix_rep" "1")
-   (set_attr "mode" "V4SF")])
-
-(define_expand "sse_shufps"
-  [(match_operand:V4SF 0 "register_operand" "")
-   (match_operand:V4SF 1 "register_operand" "")
-   (match_operand:V4SF 2 "nonimmediate_operand" "")
-   (match_operand:SI 3 "const_int_operand" "")]
-  "TARGET_SSE"
-{
-  int mask = INTVAL (operands[3]);
-  emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
-			       GEN_INT ((mask >> 0) & 3),
-			       GEN_INT ((mask >> 2) & 3),
-			       GEN_INT (((mask >> 4) & 3) + 4),
-			       GEN_INT (((mask >> 6) & 3) + 4)));
-  DONE;
-})
-
-(define_insn "sse_shufps_1"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_select:V4SF
-	  (vec_concat:V8SF
-	    (match_operand:V4SF 1 "register_operand" "0")
-	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-	  (parallel [(match_operand 3 "const_0_to_3_operand" "")
-		     (match_operand 4 "const_0_to_3_operand" "")
-		     (match_operand 5 "const_4_to_7_operand" "")
-		     (match_operand 6 "const_4_to_7_operand" "")])))]
-  "TARGET_SSE"
-{
-  int mask = 0;
-  mask |= INTVAL (operands[3]) << 0;
-  mask |= INTVAL (operands[4]) << 2;
-  mask |= (INTVAL (operands[5]) - 4) << 4;
-  mask |= (INTVAL (operands[6]) - 4) << 6;
-  operands[3] = GEN_INT (mask);
-
-  return "shufps\t{%3, %2, %0|%0, %2, %3}";
-}
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse_storehps"
-  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
-	(vec_select:V2SF
-	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
-	  (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_SSE"
-  "@
-   movhps\t{%1, %0|%0, %1}
-   movhlps\t{%1, %0|%0, %1}
-   movlps\t{%H1, %0|%0, %H1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "V2SF,V4SF,V2SF")])
-
-(define_insn "sse_loadhps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
-	(vec_concat:V4SF
-	  (vec_select:V2SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
-	    (parallel [(const_int 0) (const_int 1)]))
-	  (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
-  "TARGET_SSE"
-  "@
-   movhps\t{%2, %0|%0, %2}
-   movlhps\t{%2, %0|%0, %2}
-   movlps\t{%2, %H0|%H0, %2}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "V2SF,V4SF,V2SF")])
-
-(define_insn "sse_storelps"
-  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
-	(vec_select:V2SF
-	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
-	  (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_SSE"
-  "@
-   movlps\t{%1, %0|%0, %1}
-   movaps\t{%1, %0|%0, %1}
-   movlps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "V2SF,V4SF,V2SF")])
-
-(define_insn "sse_loadlps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
-	(vec_concat:V4SF
-	  (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
-	  (vec_select:V2SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
-	    (parallel [(const_int 2) (const_int 3)]))))]
-  "TARGET_SSE"
-  "@
-   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
-   movlps\t{%2, %0|%0, %2}
-   movlps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog,ssemov,ssemov")
-   (set_attr "mode" "V4SF,V2SF,V2SF")])
-
-(define_insn "sse_movss"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_merge:V4SF
-	  (match_operand:V4SF 2 "register_operand" "x")
-	  (match_operand:V4SF 1 "register_operand" "0")
-	  (const_int 1)))]
-  "TARGET_SSE"
-  "movss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "SF")])
-
-(define_insn "*vec_dupv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_duplicate:V4SF
-	  (match_operand:SF 1 "register_operand" "0")))]
-  "TARGET_SSE"
-  "shufps\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "type" "sselog1")
-   (set_attr "mode" "V4SF")])
-
-;; ??? In theory we can match memory for the MMX alternative, but allowing
-;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
-;; alternatives pretty much forces the MMX alternative to be chosen.
-(define_insn "*sse_concatv2sf"
-  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
-	(vec_concat:V2SF
-	  (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
-	  (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
-  "TARGET_SSE"
-  "@
-   unpcklps\t{%2, %0|%0, %2}
-   movss\t{%1, %0|%0, %1}
-   punpckldq\t{%2, %0|%0, %2}
-   movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
-   (set_attr "mode" "V4SF,SF,DI,DI")])
-
-(define_insn "*sse_concatv4sf"
-  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
-	(vec_concat:V4SF
-	  (match_operand:V2SF 1 "register_operand" " 0,0")
-	  (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
-  "TARGET_SSE"
-  "@
-   movlhps\t{%2, %0|%0, %2}
-   movhps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "V4SF,V2SF")])
-
-(define_expand "vec_initv4sf"
-  [(match_operand:V4SF 0 "register_operand" "")
-   (match_operand 1 "" "")]
-  "TARGET_SSE"
-{
-  ix86_expand_vector_init (false, operands[0], operands[1]);
-  DONE;
-})
-
-(define_insn "vec_setv4sf_0"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand"  "=x,x,Y2,m")
-	(vec_merge:V4SF
-	  (vec_duplicate:V4SF
-	    (match_operand:SF 2 "general_operand"     " x,m,*r,x*rfF"))
-	  (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
-	  (const_int 1)))]
-  "TARGET_SSE"
-  "@
-   movss\t{%2, %0|%0, %2}
-   movss\t{%2, %0|%0, %2}
-   movd\t{%2, %0|%0, %2}
-   #"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "SF")])
-
-;; A subset is vec_setv4sf.
-(define_insn "*vec_setv4sf_sse4_1"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_merge:V4SF
-	  (vec_duplicate:V4SF
-	    (match_operand:SF 2 "nonimmediate_operand" "xm"))
-	  (match_operand:V4SF 1 "register_operand" "0")
-	  (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
-  "TARGET_SSE4_1"
-{
-  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
-  return "insertps\t{%3, %2, %0|%0, %2, %3}";
-}
-  [(set_attr "type" "sselog")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse4_1_insertps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
-		      (match_operand:V4SF 1 "register_operand" "0")
-		      (match_operand:SI 3 "const_0_to_255_operand" "n")]
-		     UNSPEC_INSERTPS))]
-  "TARGET_SSE4_1"
-  "insertps\t{%3, %2, %0|%0, %2, %3}";
-  [(set_attr "type" "sselog")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V4SF")])
-
-(define_split
-  [(set (match_operand:V4SF 0 "memory_operand" "")
-	(vec_merge:V4SF
-	  (vec_duplicate:V4SF
-	    (match_operand:SF 1 "nonmemory_operand" ""))
-	  (match_dup 0)
-	  (const_int 1)))]
-  "TARGET_SSE && reload_completed"
-  [(const_int 0)]
-{
-  emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
-  DONE;
-})
-
-(define_expand "vec_setv4sf"
-  [(match_operand:V4SF 0 "register_operand" "")
-   (match_operand:SF 1 "register_operand" "")
-   (match_operand 2 "const_int_operand" "")]
-  "TARGET_SSE"
-{
-  ix86_expand_vector_set (false, operands[0], operands[1],
-			  INTVAL (operands[2]));
-  DONE;
-})
-
-(define_insn_and_split "*vec_extractv4sf_0"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
-	(vec_select:SF
-	  (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
-	  (parallel [(const_int 0)])))]
-  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  rtx op1 = operands[1];
-  if (REG_P (op1))
-    op1 = gen_rtx_REG (SFmode, REGNO (op1));
-  else
-    op1 = gen_lowpart (SFmode, op1);
-  emit_move_insn (operands[0], op1);
-  DONE;
-})
-
-(define_insn "*sse4_1_extractps"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
-	(vec_select:SF
-	  (match_operand:V4SF 1 "register_operand" "x")
-	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
-  "TARGET_SSE4_1"
-  "extractps\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V4SF")])
-
-(define_insn_and_split "*vec_extract_v4sf_mem"
-  [(set (match_operand:SF 0 "register_operand" "=x*rf")
-       (vec_select:SF
-	 (match_operand:V4SF 1 "memory_operand" "o")
-	 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
-  ""
-  "#"
-  "reload_completed"
-  [(const_int 0)]
-{
-  int i = INTVAL (operands[2]);
-
-  emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
-  DONE;
-})
-
-(define_expand "vec_extractv4sf"
-  [(match_operand:SF 0 "register_operand" "")
-   (match_operand:V4SF 1 "register_operand" "")
-   (match_operand 2 "const_int_operand" "")]
-  "TARGET_SSE"
-{
-  ix86_expand_vector_extract (false, operands[0], operands[1],
-			      INTVAL (operands[2]));
-  DONE;
-})
+   (set_attr "mode" "<ssevecmode>")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
@@ -2137,485 +1631,179 @@
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
-;; Parallel double-precision floating point arithmetic
+;; Parallel single-precision floating point conversion operations
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_expand "negv2df2"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-	(neg:V2DF (match_operand:V2DF 1 "register_operand" "")))]
-  "TARGET_SSE2"
-  "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
-
-(define_expand "absv2df2"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-	(abs:V2DF (match_operand:V2DF 1 "register_operand" "")))]
-  "TARGET_SSE2"
-  "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
-
-(define_expand "addv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-	(plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
-		   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
-
-(define_insn "*addv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
-		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
-  "addpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_vmaddv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(vec_merge:V2DF
-	  (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
-		     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-	  (match_dup 1)
-	  (const_int 1)))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
-  "addsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-
-(define_expand "subv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-	(minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
-		    (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
-
-(define_insn "*subv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
-		    (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "subpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_vmsubv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(vec_merge:V2DF
-	  (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
-		      (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-	  (match_dup 1)
-	  (const_int 1)))]
-  "TARGET_SSE2"
-  "subsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-
-(define_expand "mulv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-	(mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
-		   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
-
-(define_insn "*mulv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
-		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
-  "mulpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemul")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_vmmulv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(vec_merge:V2DF
-	  (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
-		     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-	  (match_dup 1)
-	  (const_int 1)))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
-  "mulsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemul")
-   (set_attr "mode" "DF")])
-
-(define_expand "divv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-	(div:V2DF (match_operand:V2DF 1 "register_operand" "")
-		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
-
-(define_insn "*divv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(div:V2DF (match_operand:V2DF 1 "register_operand" "0")
-		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "divpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssediv")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_vmdivv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(vec_merge:V2DF
-	  (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
-		    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-	  (match_dup 1)
-	  (const_int 1)))]
-  "TARGET_SSE2"
-  "divsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssediv")
-   (set_attr "mode" "DF")])
-
-(define_insn "sqrtv2df2"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "sqrtpd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_vmsqrtv2df2"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(vec_merge:V2DF
-	  (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
-	  (match_operand:V2DF 2 "register_operand" "0")
-	  (const_int 1)))]
-  "TARGET_SSE2"
-  "sqrtsd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "DF")])
-
-;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
-;; isn't really correct, as those rtl operators aren't defined when
-;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
-
-(define_expand "smaxv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-	(smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
-		   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-{
-  if (!flag_finite_math_only)
-    operands[1] = force_reg (V2DFmode, operands[1]);
-  ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
-})
-
-(define_insn "*smaxv2df3_finite"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
-		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2 && flag_finite_math_only
-   && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
-  "maxpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "*smaxv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
-		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "maxpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_vmsmaxv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(vec_merge:V2DF
-	  (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
-		     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-	  (match_dup 1)
-	  (const_int 1)))]
-  "TARGET_SSE2"
-  "maxsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-
-(define_expand "sminv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-	(smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
-		   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-{
-  if (!flag_finite_math_only)
-    operands[1] = force_reg (V2DFmode, operands[1]);
-  ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
-})
+(define_insn "sse_cvtpi2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
+	  (match_operand:V4SF 1 "register_operand" "0")
+	  (const_int 3)))]
+  "TARGET_SSE"
+  "cvtpi2ps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V4SF")])
 
-(define_insn "*sminv2df3_finite"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
-		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2 && flag_finite_math_only
-   && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
-  "minpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
+(define_insn "sse_cvtps2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+		       UNSPEC_FIX_NOTRUNC)
+	  (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SSE"
+  "cvtps2pi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx")
+   (set_attr "mode" "DI")])
 
-(define_insn "*sminv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
-		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "minpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
+(define_insn "sse_cvttps2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SSE"
+  "cvttps2pi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx")
+   (set_attr "mode" "SF")])
 
-(define_insn "sse2_vmsminv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(vec_merge:V2DF
-	  (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
-		     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-	  (match_dup 1)
+(define_insn "sse_cvtsi2ss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
+	  (match_operand:V4SF 1 "register_operand" "0,0")
 	  (const_int 1)))]
-  "TARGET_SSE2"
-  "minsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
+  "TARGET_SSE"
+  "cvtsi2ss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "vector,double")
+   (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "mode" "SF")])
 
-(define_insn "sse3_addsubv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(vec_merge:V2DF
-	  (plus:V2DF
-	    (match_operand:V2DF 1 "register_operand" "0")
-	    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-	  (minus:V2DF (match_dup 1) (match_dup 2))
+(define_insn "sse_cvtsi2ssq"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+	  (match_operand:V4SF 1 "register_operand" "0,0")
 	  (const_int 1)))]
-  "TARGET_SSE3"
-  "addsubpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse3_haddv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(vec_concat:V2DF
-	  (plus:DF
-	    (vec_select:DF
-	      (match_operand:V2DF 1 "register_operand" "0")
-	      (parallel [(const_int 0)]))
-	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
-	  (plus:DF
-	    (vec_select:DF
-	      (match_operand:V2DF 2 "nonimmediate_operand" "xm")
-	      (parallel [(const_int 0)]))
-	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
-  "TARGET_SSE3"
-  "haddpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse3_hsubv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(vec_concat:V2DF
-	  (minus:DF
-	    (vec_select:DF
-	      (match_operand:V2DF 1 "register_operand" "0")
-	      (parallel [(const_int 0)]))
-	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
-	  (minus:DF
-	    (vec_select:DF
-	      (match_operand:V2DF 2 "nonimmediate_operand" "xm")
-	      (parallel [(const_int 0)]))
-	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
-  "TARGET_SSE3"
-  "hsubpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_expand "reduc_splus_v2df"
-  [(match_operand:V2DF 0 "register_operand" "")
-   (match_operand:V2DF 1 "register_operand" "")]
-  "TARGET_SSE3"
-{
-  emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
-  DONE;
-})
+  "TARGET_SSE && TARGET_64BIT"
+  "cvtsi2ssq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "vector,double")
+   (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "mode" "SF")])
 
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; Parallel double-precision floating point comparisons
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "sse_cvtss2si"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI
+	  [(vec_select:SF
+	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE"
+  "cvtss2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "SI")])
 
-(define_insn "sse2_maskcmpv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(match_operator:V2DF 3 "sse_comparison_operator"
-		[(match_operand:V2DF 1 "register_operand" "0")
-		 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE2 && !TARGET_SSE5"
-  "cmp%D3pd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "V2DF")])
+(define_insn "sse_cvtss2si_2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+		   UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE"
+  "cvtss2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "SI")])
 
-(define_insn "sse2_maskcmpdf3"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-	(match_operator:DF 3 "sse_comparison_operator"
-		[(match_operand:DF 1 "register_operand" "0")
-		 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE2 && !TARGET_SSE5"
-  "cmp%D3sd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "DF")])
+(define_insn "sse_cvtss2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI
+	  [(vec_select:SF
+	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE && TARGET_64BIT"
+  "cvtss2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "DI")])
 
-(define_insn "sse2_vmmaskcmpv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(vec_merge:V2DF
-	  (match_operator:V2DF 3 "sse_comparison_operator"
-		[(match_operand:V2DF 1 "register_operand" "0")
-		 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
-	  (match_dup 1)
-	  (const_int 1)))]
-  "TARGET_SSE2 && !TARGET_SSE5"
-  "cmp%D3sd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "DF")])
+(define_insn "sse_cvtss2siq_2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+		   UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE && TARGET_64BIT"
+  "cvtss2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "DI")])
 
-(define_insn "sse2_comi"
-  [(set (reg:CCFP FLAGS_REG)
-	(compare:CCFP
-	  (vec_select:DF
-	    (match_operand:V2DF 0 "register_operand" "x")
-	    (parallel [(const_int 0)]))
-	  (vec_select:DF
-	    (match_operand:V2DF 1 "nonimmediate_operand" "xm")
+(define_insn "sse_cvttss2si"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(fix:SI
+	  (vec_select:SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
 	    (parallel [(const_int 0)]))))]
-  "TARGET_SSE2"
-  "comisd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecomi")
-   (set_attr "mode" "DF")])
+  "TARGET_SSE"
+  "cvttss2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "SI")])
 
-(define_insn "sse2_ucomi"
-  [(set (reg:CCFPU FLAGS_REG)
-	(compare:CCFPU
-	  (vec_select:DF
-	    (match_operand:V2DF 0 "register_operand" "x")
-	    (parallel [(const_int 0)]))
-	  (vec_select:DF
-	    (match_operand:V2DF 1 "nonimmediate_operand" "xm")
+(define_insn "sse_cvttss2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(fix:DI
+	  (vec_select:SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
 	    (parallel [(const_int 0)]))))]
-  "TARGET_SSE2"
-  "ucomisd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecomi")
-   (set_attr "mode" "DF")])
-
-(define_expand "vcondv2df"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-        (if_then_else:V2DF
-          (match_operator 3 ""
-            [(match_operand:V2DF 4 "nonimmediate_operand" "")
-             (match_operand:V2DF 5 "nonimmediate_operand" "")])
-          (match_operand:V2DF 1 "general_operand" "")
-          (match_operand:V2DF 2 "general_operand" "")))]
-  "TARGET_SSE2"
-{
-  if (ix86_expand_fp_vcond (operands))
-    DONE;
-  else
-    FAIL;
-})
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; Parallel double-precision floating point logical operations
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_expand "andv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-	(and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
-		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
-
-(define_insn "*andv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
-		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
-  "andpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_nandv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
-		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "andnpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
-
-(define_expand "iorv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-	(ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
-		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
-
-(define_insn "*iorv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
-		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
-  "orpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
-
-(define_expand "xorv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-	(xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
-		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
-
-(define_insn "*xorv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
-		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
-  "xorpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
-
-;; Also define scalar versions.  These are used for abs, neg, and
-;; conditional move.  Using subregs into vector modes causes register
-;; allocation lossage.  These patterns do not allow memory operands
-;; because the native instructions read the full 128-bits.
-
-(define_insn "*anddf3"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-	(and:DF (match_operand:DF 1 "register_operand" "0")
-		(match_operand:DF 2 "register_operand" "x")))]
-  "TARGET_SSE2"
-  "andpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
+  "TARGET_SSE && TARGET_64BIT"
+  "cvttss2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "DI")])
 
-(define_insn "*nanddf3"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-	(and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
-		(match_operand:DF 2 "register_operand" "x")))]
+(define_insn "sse2_cvtdq2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2"
-  "andnpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
+  "cvtdq2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V4SF")])
 
-(define_insn "*iordf3"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-	(ior:DF (match_operand:DF 1 "register_operand" "0")
-		(match_operand:DF 2 "register_operand" "x")))]
+(define_insn "sse2_cvtps2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+		     UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE2"
-  "orpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
+  "cvtps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
 
-(define_insn "*xordf3"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-	(xor:DF (match_operand:DF 1 "register_operand" "0")
-		(match_operand:DF 2 "register_operand" "x")))]
+(define_insn "sse2_cvttps2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2"
-  "xorpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
+  "cvttps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "TI")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
@@ -2698,7 +1886,7 @@
 (define_insn "sse2_cvtsd2si_2"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
-	 UNSPEC_FIX_NOTRUNC))]
+		   UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE2"
   "cvtsd2si\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
@@ -2724,7 +1912,7 @@
 (define_insn "sse2_cvtsd2siq_2"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
 	(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
-	 UNSPEC_FIX_NOTRUNC))]
+		   UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE2 && TARGET_64BIT"
   "cvtsd2siq\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
@@ -3035,6 +2223,388 @@
   DONE;
 })
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse_movhlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,m")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
+	  (parallel [(const_int 6)
+		     (const_int 7)
+		     (const_int 2)
+		     (const_int 3)])))]
+  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   movhlps\t{%2, %0|%0, %2}
+   movlps\t{%H2, %0|%0, %H2}
+   movhps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_movlhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,o")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
+	  (parallel [(const_int 0)
+		     (const_int 1)
+		     (const_int 4)
+		     (const_int 5)])))]
+  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
+  "@
+   movlhps\t{%2, %0|%0, %2}
+   movhps\t{%2, %0|%0, %2}
+   movlps\t{%2, %H0|%H0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_unpckhps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "register_operand" "0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_SSE"
+  "unpckhps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_unpcklps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "register_operand" "0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  "TARGET_SSE"
+  "unpcklps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+;; These are modeled with the same vec_concat as the others so that we
+;; capture users of shufps that can use the new instructions
+(define_insn "sse3_movshdup"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+	    (match_dup 1))
+	  (parallel [(const_int 1)
+		     (const_int 1)
+		     (const_int 7)
+		     (const_int 7)])))]
+  "TARGET_SSE3"
+  "movshdup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse3_movsldup"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+	    (match_dup 1))
+	  (parallel [(const_int 0)
+		     (const_int 0)
+		     (const_int 6)
+		     (const_int 6)])))]
+  "TARGET_SSE3"
+  "movsldup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "V4SF")])
+
+(define_expand "sse_shufps"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:V4SF 1 "register_operand" "")
+   (match_operand:V4SF 2 "nonimmediate_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_SSE"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
+			       GEN_INT ((mask >> 0) & 3),
+			       GEN_INT ((mask >> 2) & 3),
+			       GEN_INT (((mask >> 4) & 3) + 4),
+			       GEN_INT (((mask >> 6) & 3) + 4)));
+  DONE;
+})
+
+(define_insn "sse_shufps_1"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "register_operand" "0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(match_operand 3 "const_0_to_3_operand" "")
+		     (match_operand 4 "const_0_to_3_operand" "")
+		     (match_operand 5 "const_4_to_7_operand" "")
+		     (match_operand 6 "const_4_to_7_operand" "")])))]
+  "TARGET_SSE"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[3]) << 0;
+  mask |= INTVAL (operands[4]) << 2;
+  mask |= (INTVAL (operands[5]) - 4) << 4;
+  mask |= (INTVAL (operands[6]) - 4) << 6;
+  operands[3] = GEN_INT (mask);
+
+  return "shufps\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_storehps"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+	(vec_select:V2SF
+	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
+	  (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_SSE"
+  "@
+   movhps\t{%1, %0|%0, %1}
+   movhlps\t{%1, %0|%0, %1}
+   movlps\t{%H1, %0|%0, %H1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_loadhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+	(vec_concat:V4SF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
+	    (parallel [(const_int 0) (const_int 1)]))
+	  (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
+  "TARGET_SSE"
+  "@
+   movhps\t{%2, %0|%0, %2}
+   movlhps\t{%2, %0|%0, %2}
+   movlps\t{%2, %H0|%H0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_storelps"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+	(vec_select:V2SF
+	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
+	  (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SSE"
+  "@
+   movlps\t{%1, %0|%0, %1}
+   movaps\t{%1, %0|%0, %1}
+   movlps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_loadlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+	(vec_concat:V4SF
+	  (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
+	    (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_SSE"
+  "@
+   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
+   movlps\t{%2, %0|%0, %2}
+   movlps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog,ssemov,ssemov")
+   (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_movss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (match_operand:V4SF 2 "register_operand" "x")
+	  (match_operand:V4SF 1 "register_operand" "0")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "movss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "SF")])
+
+(define_insn "*vec_dupv4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_duplicate:V4SF
+	  (match_operand:SF 1 "register_operand" "0")))]
+  "TARGET_SSE"
+  "shufps\t{$0, %0, %0|%0, %0, 0}"
+  [(set_attr "type" "sselog1")
+   (set_attr "mode" "V4SF")])
+
+;; ??? In theory we can match memory for the MMX alternative, but allowing
+;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
+;; alternatives pretty much forces the MMX alternative to be chosen.
+(define_insn "*sse_concatv2sf"
+  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
+	(vec_concat:V2SF
+	  (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
+	  (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
+  "TARGET_SSE"
+  "@
+   unpcklps\t{%2, %0|%0, %2}
+   movss\t{%1, %0|%0, %1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "mode" "V4SF,SF,DI,DI")])
+
+(define_insn "*sse_concatv4sf"
+  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
+	(vec_concat:V4SF
+	  (match_operand:V2SF 1 "register_operand" " 0,0")
+	  (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
+  "TARGET_SSE"
+  "@
+   movlhps\t{%2, %0|%0, %2}
+   movhps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V4SF,V2SF")])
+
+(define_expand "vec_initv4sf"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "vec_setv4sf_0"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"  "=x,x,Y2,m")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (match_operand:SF 2 "general_operand"     " x,m,*r,x*rfF"))
+	  (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   movss\t{%2, %0|%0, %2}
+   movss\t{%2, %0|%0, %2}
+   movd\t{%2, %0|%0, %2}
+   #"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "SF")])
+
+;; A subset is vec_setv4sf.
+(define_insn "*vec_setv4sf_sse4_1"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (match_operand:SF 2 "nonimmediate_operand" "xm"))
+	  (match_operand:V4SF 1 "register_operand" "0")
+	  (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+  "TARGET_SSE4_1"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
+  return "insertps\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_insertps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
+		      (match_operand:V4SF 1 "register_operand" "0")
+		      (match_operand:SI 3 "const_0_to_255_operand" "n")]
+		     UNSPEC_INSERTPS))]
+  "TARGET_SSE4_1"
+  "insertps\t{%3, %2, %0|%0, %2, %3}";
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V4SF")])
+
+(define_split
+  [(set (match_operand:V4SF 0 "memory_operand" "")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (match_operand:SF 1 "nonmemory_operand" ""))
+	  (match_dup 0)
+	  (const_int 1)))]
+  "TARGET_SSE && reload_completed"
+  [(const_int 0)]
+{
+  emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
+  DONE;
+})
+
+(define_expand "vec_setv4sf"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:SF 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_set (false, operands[0], operands[1],
+			  INTVAL (operands[2]));
+  DONE;
+})
+
+(define_insn_and_split "*vec_extractv4sf_0"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
+	(vec_select:SF
+	  (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (SFmode, REGNO (op1));
+  else
+    op1 = gen_lowpart (SFmode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_insn "*sse4_1_extractps"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
+	(vec_select:SF
+	  (match_operand:V4SF 1 "register_operand" "x")
+	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
+  "TARGET_SSE4_1"
+  "extractps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V4SF")])
+
+(define_insn_and_split "*vec_extract_v4sf_mem"
+  [(set (match_operand:SF 0 "register_operand" "=x*rf")
+       (vec_select:SF
+	 (match_operand:V4SF 1 "memory_operand" "o")
+	 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  int i = INTVAL (operands[2]);
+
+  emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
+  DONE;
+})
+
+(define_expand "vec_extractv4sf"
+  [(match_operand:SF 0 "register_operand" "")
+   (match_operand:V4SF 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_extract (false, operands[0], operands[1],
+			      INTVAL (operands[2]));
+  DONE;
+})
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
@@ -5847,23 +5417,15 @@
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "sse_movmskps"
+(define_insn "<sse>_movmskp<ssemodesuffixf2c>"
   [(set (match_operand:SI 0 "register_operand" "=r")
-	(unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
-		   UNSPEC_MOVMSK))]
-  "TARGET_SSE"
-  "movmskps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse2_movmskpd"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
-		   UNSPEC_MOVMSK))]
-  "TARGET_SSE2"
-  "movmskpd\t{%1, %0|%0, %1}"
+	(unspec:SI
+	  [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
+	  UNSPEC_MOVMSK))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "sse2_pmovmskb"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -6017,7 +5579,12 @@
   "monitor"
   [(set_attr "length" "3")])
 
-;; SSSE3
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; SSSE3 instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
 (define_insn "ssse3_phaddwv8hi3"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
 	(vec_concat:V8HI
@@ -6536,7 +6103,7 @@
   [(set (match_operand:V16QI 0 "register_operand" "=x")
 	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
 		       (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
-		       UNSPEC_PSHUFB))]
+		      UNSPEC_PSHUFB))]
   "TARGET_SSSE3"
   "pshufb\t{%2, %0|%0, %2}";
   [(set_attr "type" "sselog1")
@@ -6548,7 +6115,7 @@
   [(set (match_operand:V8QI 0 "register_operand" "=y")
 	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
 		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
-		      UNSPEC_PSHUFB))]
+		     UNSPEC_PSHUFB))]
   "TARGET_SSSE3"
   "pshufb\t{%2, %0|%0, %2}";
   [(set_attr "type" "sselog1")
@@ -6557,9 +6124,10 @@
 
 (define_insn "ssse3_psign<mode>3"
   [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
-	(unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
-			    (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
-			    UNSPEC_PSIGN))]
+	(unspec:SSEMODE124
+	  [(match_operand:SSEMODE124 1 "register_operand" "0")
+	   (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
+	  UNSPEC_PSIGN))]
   "TARGET_SSSE3"
   "psign<ssevecsize>\t{%2, %0|%0, %2}";
   [(set_attr "type" "sselog1")
@@ -6569,9 +6137,10 @@
 
 (define_insn "ssse3_psign<mode>3"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
-	(unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
-			  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
-			  UNSPEC_PSIGN))]
+	(unspec:MMXMODEI
+	  [(match_operand:MMXMODEI 1 "register_operand" "0")
+	   (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
+	  UNSPEC_PSIGN))]
   "TARGET_SSSE3"
   "psign<mmxvecsize>\t{%2, %0|%0, %2}";
   [(set_attr "type" "sselog1")
@@ -6583,7 +6152,7 @@
 	(unspec:TI [(match_operand:TI 1 "register_operand" "0")
 		    (match_operand:TI 2 "nonimmediate_operand" "xm")
 		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
-		    UNSPEC_PALIGNR))]
+		   UNSPEC_PALIGNR))]
   "TARGET_SSSE3"
 {
   operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
@@ -6599,7 +6168,7 @@
 	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
 		    (match_operand:DI 2 "nonimmediate_operand" "ym")
 		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
-		    UNSPEC_PALIGNR))]
+		   UNSPEC_PALIGNR))]
   "TARGET_SSSE3"
 {
   operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
@@ -6634,45 +6203,27 @@
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "sse4a_vmmovntv2df"
-  [(set (match_operand:DF 0 "memory_operand" "=m")
-        (unspec:DF [(vec_select:DF
-                      (match_operand:V2DF 1 "register_operand" "x")
-                      (parallel [(const_int 0)]))]
-                   UNSPEC_MOVNT))]
-  "TARGET_SSE4A"
-  "movntsd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "DF")])
-
-(define_insn "sse4a_movntdf"
-  [(set (match_operand:DF 0 "memory_operand" "=m")
-        (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
-                   UNSPEC_MOVNT))]
+(define_insn "sse4a_movnt<mode>"
+  [(set (match_operand:MODEF 0 "memory_operand" "=m")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "x")]
+          UNSPEC_MOVNT))]
   "TARGET_SSE4A"
-  "movntsd\t{%1, %0|%0, %1}"
+  "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
-   (set_attr "mode" "DF")])
-
-(define_insn "sse4a_vmmovntv4sf"
-  [(set (match_operand:SF 0 "memory_operand" "=m")
-	(unspec:SF [(vec_select:SF
-	              (match_operand:V4SF 1 "register_operand" "x")
-		      (parallel [(const_int 0)]))]
-		   UNSPEC_MOVNT))]
-  "TARGET_SSE4A"
-  "movntss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "sse4a_movntsf"
-  [(set (match_operand:SF 0 "memory_operand" "=m")
-	(unspec:SF [(match_operand:SF 1 "register_operand" "x")]
-		   UNSPEC_MOVNT))]
+(define_insn "sse4a_vmmovnt<mode>"
+  [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
+	(unspec:<ssescalarmode>
+	  [(vec_select:<ssescalarmode>
+	     (match_operand:SSEMODEF2P 1 "register_operand" "x")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_MOVNT))]
   "TARGET_SSE4A"
-  "movntss\t{%1, %0|%0, %1}"
+  "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<ssescalarmode>")])
 
 (define_insn "sse4a_extrqi"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
@@ -6727,77 +6278,43 @@
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "sse4_1_blendpd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(vec_merge:V2DF
-	  (match_operand:V2DF 2 "nonimmediate_operand" "xm")
-	  (match_operand:V2DF 1 "register_operand" "0")
+(define_insn "sse4_1_blendp<ssemodesuffixf2c>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
+	  (match_operand:SSEMODEF2P 1 "register_operand" "0")
 	  (match_operand:SI 3 "const_0_to_3_operand" "n")))]
   "TARGET_SSE4_1"
-  "blendpd\t{%3, %2, %0|%0, %2, %3}"
+  "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse4_1_blendps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_merge:V4SF
-	  (match_operand:V4SF 2 "nonimmediate_operand" "xm")
-	  (match_operand:V4SF 1 "register_operand" "0")
-	  (match_operand:SI 3 "const_0_to_15_operand" "n")))]
-  "TARGET_SSE4_1"
-  "blendps\t{%3, %2, %0|%0, %2, %3}"
-  [(set_attr "type" "ssemov")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse4_1_blendvpd"
-  [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
-	(unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand"  "0")
-		      (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
-		      (match_operand:V2DF 3 "register_operand" "Yz")]
-		     UNSPEC_BLENDV))]
-  "TARGET_SSE4_1"
-  "blendvpd\t{%3, %2, %0|%0, %2, %3}"
-  [(set_attr "type" "ssemov")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V2DF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "sse4_1_blendvps"
-  [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
-	(unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
-		      (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
-		      (match_operand:V4SF 3 "register_operand" "Yz")]
-		     UNSPEC_BLENDV))]
+(define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
+  [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
+	   (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
+	   (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
+	  UNSPEC_BLENDV))]
   "TARGET_SSE4_1"
-  "blendvps\t{%3, %2, %0|%0, %2, %3}"
+  "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse4_1_dppd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
-		      (match_operand:V2DF 2 "nonimmediate_operand" "xm")
-		      (match_operand:SI 3 "const_0_to_255_operand" "n")]
-		      UNSPEC_DP))]
-  "TARGET_SSE4_1"
-  "dppd\t{%3, %2, %0|%0, %2, %3}"
-  [(set_attr "type" "ssemul")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V2DF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "sse4_1_dpps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
-		      (match_operand:V4SF 2 "nonimmediate_operand" "xm")
-		      (match_operand:SI 3 "const_0_to_255_operand" "n")]
-		     UNSPEC_DP))]
+(define_insn "sse4_1_dpp<ssemodesuffixf2c>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+	   (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
+	  UNSPEC_DP))]
   "TARGET_SSE4_1"
-  "dpps\t{%3, %2, %0|%0, %2, %3}"
+  "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "ssemul")
    (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V4SF")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "sse4_1_movntdqa"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
@@ -7245,55 +6762,32 @@
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "sse4_1_roundpd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
-		      (match_operand:SI 2 "const_0_to_15_operand" "n")]
-		     UNSPEC_ROUND))]
-  "TARGET_ROUND"
-  "roundpd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse4_1_roundps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
-		      (match_operand:SI 2 "const_0_to_15_operand" "n")]
-		     UNSPEC_ROUND))]
-  "TARGET_ROUND"
-  "roundps\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse4_1_roundsd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-	(vec_merge:V2DF
-	  (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
-			(match_operand:SI 3 "const_0_to_15_operand" "n")]
-		       UNSPEC_ROUND)
-	  (match_operand:V2DF 1 "register_operand" "0")
-	  (const_int 1)))]
+(define_insn "sse4_1_roundp<ssemodesuffixf2c>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
+	   (match_operand:SI 2 "const_0_to_15_operand" "n")]
+	  UNSPEC_ROUND))]
   "TARGET_ROUND"
-  "roundsd\t{%3, %2, %0|%0, %2, %3}"
+  "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V2DF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "sse4_1_roundss"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-	(vec_merge:V4SF
-	  (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
-			(match_operand:SI 3 "const_0_to_15_operand" "n")]
-		       UNSPEC_ROUND)
-	  (match_operand:V4SF 1 "register_operand" "0")
+(define_insn "sse4_1_rounds<ssemodesuffixf2c>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (unspec:SSEMODEF2P
+	    [(match_operand:SSEMODEF2P 2 "register_operand" "x")
+	     (match_operand:SI 3 "const_0_to_15_operand" "n")]
+	    UNSPEC_ROUND)
+	  (match_operand:SSEMODEF2P 1 "register_operand" "0")
 	  (const_int 1)))]
   "TARGET_ROUND"
-  "roundss\t{%3, %2, %0|%0, %2, %3}"
+  "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V4SF")])
+   (set_attr "mode" "<MODE>")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
@@ -8347,10 +7841,11 @@
 ;; SSE5 permute instructions
 (define_insn "sse5_pperm"
   [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
-	(unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
-		       (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
-		       (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
-		     UNSPEC_SSE5_PERMUTE))]
+	(unspec:V16QI
+	  [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
+	   (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
+	  UNSPEC_SSE5_PERMUTE))]
   "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
   "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
@@ -8553,7 +8048,7 @@
 	 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
 	 UNSPEC_FRCZ))]
   "TARGET_SSE5"
-  "frcz<ssesuffixf4>\t{%1, %0|%0, %1}"
+  "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "<MODE>")])
@@ -8567,8 +8062,8 @@
 	   UNSPEC_FRCZ)
 	  (match_operand:SSEMODEF2P 1 "register_operand" "0")
 	  (const_int 1)))]
-  "TARGET_ROUND"
-  "frcz<ssesuffixf2s>\t{%2, %0|%0, %2}"
+  "TARGET_SSE5"
+  "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecvt1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "<MODE>")])
@@ -8710,10 +8205,11 @@
 ;; being added here to be complete.
 (define_insn "sse5_pcom_tf<mode>3"
   [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
-	(unspec:SSEMODE1248 [(match_operand:SSEMODE1248 1 "register_operand" "x")
-			     (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
-			     (match_operand:SI 3 "const_int_operand" "n")]
-			    UNSPEC_SSE5_TRUEFALSE))]
+	(unspec:SSEMODE1248
+	  [(match_operand:SSEMODE1248 1 "register_operand" "x")
+	   (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_int_operand" "n")]
+	  UNSPEC_SSE5_TRUEFALSE))]
   "TARGET_SSE5"
 {
   return ((INTVAL (operands[3]) != 0)
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index a6ddb8c967f..b5c66622c2a 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -4312,9 +4312,9 @@
   [(unspec_volatile [(reg:DI 28)] UNSPEC_BLOCKAGE)]
   ""
   ""
-  [(set_attr "type"	"unknown")
-   (set_attr "mode"	"none")
-   (set_attr "length"	"0")])
+  [(set_attr "type" "ghost")
+   (set_attr "mode" "none")
+   (set_attr "length" "0")])
 
 ;; Initialize $gp for RTP PIC.  Operand 0 is the __GOTT_BASE__ symbol
 ;; and operand 1 is the __GOTT_INDEX__ symbol.
@@ -5517,9 +5517,9 @@
   [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)]
   ""
   ""
-  [(set_attr "type"	"unknown")
-   (set_attr "mode"	"none")
-   (set_attr "length"	"0")])
+  [(set_attr "type" "ghost")
+   (set_attr "mode" "none")
+   (set_attr "length" "0")])
 
 (define_expand "epilogue"
   [(const_int 2)]
diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h
index 6c88757d24e..f6c9ee3c2e2 100644
--- a/gcc/config/rs6000/darwin.h
+++ b/gcc/config/rs6000/darwin.h
@@ -432,3 +432,6 @@
 /* When generating kernel code or kexts, we don't use Altivec by
    default, as kernel code doesn't save/restore those registers.  */
 #define OS_MISSING_ALTIVEC (flag_mkernel || flag_apple_kext)
+
+/* Darwin has to rename some of the long double builtins.  */
+#define SUBTARGET_INIT_BUILTINS darwin_patch_builtins ()
diff --git a/gcc/config/rs6000/eabispe.h b/gcc/config/rs6000/eabispe.h
index 0acc17c7022..c3a3f2b4e53 100644
--- a/gcc/config/rs6000/eabispe.h
+++ b/gcc/config/rs6000/eabispe.h
@@ -31,7 +31,7 @@
 #define SUBSUBTARGET_OVERRIDE_OPTIONS \
   if (rs6000_select[1].string == NULL) \
     rs6000_cpu = PROCESSOR_PPC8540; \
-  if (!rs6000_explicit_options.abi) \
+  if (!rs6000_explicit_options.spe_abi) \
     rs6000_spe_abi = 1; \
   if (!rs6000_explicit_options.float_gprs) \
     rs6000_float_gprs = 1; \
diff --git a/gcc/config/rs6000/linuxspe.h b/gcc/config/rs6000/linuxspe.h
index 1aea7442c54..c526cf8dce4 100644
--- a/gcc/config/rs6000/linuxspe.h
+++ b/gcc/config/rs6000/linuxspe.h
@@ -30,7 +30,7 @@
 #define SUBSUBTARGET_OVERRIDE_OPTIONS \
   if (rs6000_select[1].string == NULL) \
     rs6000_cpu = PROCESSOR_PPC8540; \
-  if (!rs6000_explicit_options.abi) \
+  if (!rs6000_explicit_options.spe_abi) \
     rs6000_spe_abi = 1; \
   if (!rs6000_explicit_options.float_gprs) \
     rs6000_float_gprs = 1; \
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 4ebea38388b..9b9fefda2af 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -171,7 +171,7 @@ int rs6000_long_double_type_size;
 /* IEEE quad extended precision long double. */
 int rs6000_ieeequad;
 
-/* Whether -mabi=altivec has appeared.  */
+/* Nonzero to use AltiVec ABI.  */
 int rs6000_altivec_abi;
 
 /* Nonzero if we want SPE ABI extensions.  */
@@ -262,12 +262,14 @@ int rs6000_alignment_flags;
 struct {
   bool aix_struct_ret;		/* True if -maix-struct-ret was used.  */
   bool alignment;		/* True if -malign- was used.  */
-  bool abi;			/* True if -mabi=spe/nospe was used.  */
+  bool spe_abi;			/* True if -mabi=spe/no-spe was used.  */
+  bool altivec_abi;		/* True if -mabi=altivec/no-altivec used.  */
   bool spe;			/* True if -mspe= was used.  */
   bool float_gprs;		/* True if -mfloat-gprs= was used.  */
   bool isel;			/* True if -misel was used. */
   bool long_double;	        /* True if -mlong-double- was used.  */
   bool ieee;			/* True if -mabi=ieee/ibmlongdouble used.  */
+  bool vrsave;			/* True if -mvrsave was used.  */
 } rs6000_explicit_options;
 
 struct builtin_description
@@ -667,6 +669,25 @@ struct processor_costs ppc8540_cost = {
   1,			/* prefetch streams /*/
 };
 
+/* Instruction costs on E300C2 and E300C3 cores.  */
+static const
+struct processor_costs ppce300c2c3_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (19),   /* divsi */
+  COSTS_N_INSNS (19),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (4),    /* dmul */
+  COSTS_N_INSNS (18),   /* sdiv */
+  COSTS_N_INSNS (33),   /* ddiv */
+  32,
+  32,			/* l1 cache */
+  256,			/* l2 cache */
+  1,			/* prefetch streams /*/
+};
+
 /* Instruction costs on POWER4 and POWER5 processors.  */
 static const
 struct processor_costs power4_cost = {
@@ -1418,6 +1439,8 @@ rs6000_override_options (const char *default_cpu)
 	 {"8540", PROCESSOR_PPC8540, POWERPC_BASE_MASK | MASK_STRICT_ALIGN},
 	 /* 8548 has a dummy entry for now.  */
 	 {"8548", PROCESSOR_PPC8540, POWERPC_BASE_MASK | MASK_STRICT_ALIGN},
+	 {"e300c2", PROCESSOR_PPCE300C2, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
+	 {"e300c3", PROCESSOR_PPCE300C3, POWERPC_BASE_MASK},
 	 {"860", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
 	 {"970", PROCESSOR_POWER4,
 	  POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64},
@@ -1524,6 +1547,14 @@ rs6000_override_options (const char *default_cpu)
   if (TARGET_E500)
     rs6000_isel = 1;
 
+  if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3)
+    {
+      if (TARGET_ALTIVEC)
+	error ("AltiVec not supported in this target");
+      if (TARGET_SPE)
+	error ("Spe not supported in this target");
+    }
+
   /* If we are optimizing big endian systems for space, use the load/store
      multiple and string instructions.  */
   if (BYTES_BIG_ENDIAN && optimize_size)
@@ -1590,11 +1621,18 @@ rs6000_override_options (const char *default_cpu)
   if (TARGET_XCOFF && TARGET_ALTIVEC)
     rs6000_altivec_abi = 1;
 
-  /* Set Altivec ABI as default for PowerPC64 Linux.  */
-  if (TARGET_ELF && TARGET_64BIT)
+  /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux.  For
+     PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI.  It can
+     be explicitly overridden in either case.  */
+  if (TARGET_ELF)
     {
-      rs6000_altivec_abi = 1;
-      TARGET_ALTIVEC_VRSAVE = 1;
+      if (!rs6000_explicit_options.altivec_abi
+	  && (TARGET_64BIT || TARGET_ALTIVEC))
+	rs6000_altivec_abi = 1;
+
+      /* Enable VRSAVE for AltiVec ABI, unless explicitly overridden.  */
+      if (!rs6000_explicit_options.vrsave)
+	TARGET_ALTIVEC_VRSAVE = rs6000_altivec_abi;
     }
 
   /* Set the Darwin64 ABI as default for 64-bit Darwin.  */
@@ -1638,7 +1676,7 @@ rs6000_override_options (const char *default_cpu)
       /* For the powerpc-eabispe configuration, we set all these by
 	 default, so let's unset them if we manually set another
 	 CPU that is not the E500.  */
-      if (!rs6000_explicit_options.abi)
+      if (!rs6000_explicit_options.spe_abi)
 	rs6000_spe_abi = 0;
       if (!rs6000_explicit_options.spe)
 	rs6000_spe = 0;
@@ -1836,6 +1874,11 @@ rs6000_override_options (const char *default_cpu)
 	rs6000_cost = &ppc8540_cost;
 	break;
 
+      case PROCESSOR_PPCE300C2:
+      case PROCESSOR_PPCE300C3:
+	rs6000_cost = &ppce300c2c3_cost;
+	break;
+
       case PROCESSOR_POWER4:
       case PROCESSOR_POWER5:
 	rs6000_cost = &power4_cost;
@@ -2131,6 +2174,7 @@ rs6000_handle_option (size_t code, const char *arg, int value)
       break;
 
     case OPT_mvrsave_:
+      rs6000_explicit_options.vrsave = true;
       rs6000_parse_yes_no_option ("vrsave", arg, &(TARGET_ALTIVEC_VRSAVE));
       break;
 
@@ -2188,19 +2232,20 @@ rs6000_handle_option (size_t code, const char *arg, int value)
     case OPT_mabi_:
       if (!strcmp (arg, "altivec"))
 	{
-	  rs6000_explicit_options.abi = true;
+	  rs6000_explicit_options.altivec_abi = true;
 	  rs6000_altivec_abi = 1;
+
+	  /* Enabling the AltiVec ABI turns off the SPE ABI.  */
 	  rs6000_spe_abi = 0;
 	}
       else if (! strcmp (arg, "no-altivec"))
 	{
-	  /* ??? Don't set rs6000_explicit_options.abi here, to allow
-	     the default for rs6000_spe_abi to be chosen later.  */
+	  rs6000_explicit_options.altivec_abi = true;
 	  rs6000_altivec_abi = 0;
 	}
       else if (! strcmp (arg, "spe"))
 	{
-	  rs6000_explicit_options.abi = true;
+	  rs6000_explicit_options.spe_abi = true;
 	  rs6000_spe_abi = 1;
 	  rs6000_altivec_abi = 0;
 	  if (!TARGET_SPE_ABI)
@@ -2208,7 +2253,7 @@ rs6000_handle_option (size_t code, const char *arg, int value)
 	}
       else if (! strcmp (arg, "no-spe"))
 	{
-	  rs6000_explicit_options.abi = true;
+	  rs6000_explicit_options.spe_abi = true;
 	  rs6000_spe_abi = 0;
 	}
 
@@ -3619,19 +3664,29 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
       /* We accept [reg + reg] and [reg + OFFSET].  */
 
       if (GET_CODE (x) == PLUS)
-	{
-	  rtx op1 = XEXP (x, 0);
-	  rtx op2 = XEXP (x, 1);
+       {
+         rtx op1 = XEXP (x, 0);
+         rtx op2 = XEXP (x, 1);
+         rtx y;
 
-	  op1 = force_reg (Pmode, op1);
+         op1 = force_reg (Pmode, op1);
 
-	  if (GET_CODE (op2) != REG
-	      && (GET_CODE (op2) != CONST_INT
-		  || !SPE_CONST_OFFSET_OK (INTVAL (op2))))
-	    op2 = force_reg (Pmode, op2);
+         if (GET_CODE (op2) != REG
+             && (GET_CODE (op2) != CONST_INT
+                 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
+                 || (GET_MODE_SIZE (mode) > 8
+                     && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
+           op2 = force_reg (Pmode, op2);
 
-	  return gen_rtx_PLUS (Pmode, op1, op2);
-	}
+         /* We can't always do [reg + reg] for these, because [reg +
+            reg + offset] is not a legitimate addressing mode.  */
+         y = gen_rtx_PLUS (Pmode, op1, op2);
+
+         if (GET_MODE_SIZE (mode) > 8 && REG_P (op2))
+           return force_reg (Pmode, y);
+         else
+           return y;
+       }
 
       return force_reg (Pmode, x);
     }
@@ -9166,6 +9221,10 @@ rs6000_init_builtins (void)
   if (built_in_decls [BUILT_IN_CLOG])
     set_user_assembler_name (built_in_decls [BUILT_IN_CLOG], "__clog");
 #endif
+
+#ifdef SUBTARGET_INIT_BUILTINS
+  SUBTARGET_INIT_BUILTINS;
+#endif
 }
 
 /* Search through a set of builtins and enable the mask bits.
@@ -18502,6 +18561,8 @@ rs6000_issue_rate (void)
   case CPU_PPC7400:
   case CPU_PPC8540:
   case CPU_CELL:
+  case CPU_PPCE300C2:
+  case CPU_PPCE300C3:
     return 2;
   case CPU_RIOS2:
   case CPU_PPC604:
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 6a64eae3dd9..7f7dd57e1e1 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -60,6 +60,18 @@
 #define TARGET_PAIRED_FLOAT 0
 #endif
 
+#ifdef HAVE_AS_POPCNTB
+#define ASM_CPU_POWER5_SPEC "-mpower5"
+#else
+#define ASM_CPU_POWER5_SPEC "-mpower4"
+#endif
+
+#ifdef HAVE_AS_DFP
+#define ASM_CPU_POWER6_SPEC "-mpower6 -maltivec"
+#else
+#define ASM_CPU_POWER6_SPEC "-mpower4 -maltivec"
+#endif
+
 /* Common ASM definitions used by ASM_SPEC among the various targets
    for handling -mcpu=xxx switches.  */
 #define ASM_CPU_SPEC \
@@ -76,10 +88,10 @@
 %{mcpu=power2: -mpwrx} \
 %{mcpu=power3: -mppc64} \
 %{mcpu=power4: -mpower4} \
-%{mcpu=power5: -mpower4} \
-%{mcpu=power5+: -mpower4} \
-%{mcpu=power6: -mpower4 -maltivec} \
-%{mcpu=power6x: -mpower4 -maltivec} \
+%{mcpu=power5: %(asm_cpu_power5)} \
+%{mcpu=power5+: %(asm_cpu_power5)} \
+%{mcpu=power6: %(asm_cpu_power6) -maltivec} \
+%{mcpu=power6x: %(asm_cpu_power6) -maltivec} \
 %{mcpu=powerpc: -mppc} \
 %{mcpu=rios: -mpwr} \
 %{mcpu=rios1: -mpwr} \
@@ -117,6 +129,8 @@
 %{mcpu=G5: -mpower4 -maltivec} \
 %{mcpu=8540: -me500} \
 %{mcpu=8548: -me500} \
+%{mcpu=e300c2: -me300} \
+%{mcpu=e300c3: -me300} \
 %{maltivec: -maltivec} \
 -many"
 
@@ -141,6 +155,8 @@
   { "asm_cpu",			ASM_CPU_SPEC },				\
   { "asm_default",		ASM_DEFAULT_SPEC },			\
   { "cc1_cpu",			CC1_CPU_SPEC },				\
+  { "asm_cpu_power5",		ASM_CPU_POWER5_SPEC },			\
+  { "asm_cpu_power6",		ASM_CPU_POWER6_SPEC },			\
   SUBTARGET_EXTRA_SPECS
 
 /* -mcpu=native handling only makes sense with compiler running on
@@ -262,6 +278,8 @@ enum processor_type
    PROCESSOR_PPC7400,
    PROCESSOR_PPC7450,
    PROCESSOR_PPC8540,
+   PROCESSOR_PPCE300C2,
+   PROCESSOR_PPCE300C3,
    PROCESSOR_POWER4,
    PROCESSOR_POWER5,
    PROCESSOR_POWER6,
@@ -596,6 +614,7 @@ extern enum rs6000_nop_insertion rs6000_sched_insert_nops;
    Make vector constants quadword aligned.  */
 #define CONSTANT_ALIGNMENT(EXP, ALIGN)                           \
   (TREE_CODE (EXP) == STRING_CST	                         \
+   && (STRICT_ALIGNMENT || !optimize_size)                       \
    && (ALIGN) < BITS_PER_WORD                                    \
    ? BITS_PER_WORD                                               \
    : (ALIGN))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index d1b43dc78af..777a1ecf46d 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -133,7 +133,7 @@
 ;; Processor type -- this attribute must exactly match the processor_type
 ;; enumeration in rs6000.h.
 
-(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,power4,power5,power6,cell"
+(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppce300c2,ppce300c3,power4,power5,power6,cell"
   (const (symbol_ref "rs6000_cpu_attr")))
 
 
@@ -166,6 +166,7 @@
 (include "7xx.md")
 (include "7450.md")
 (include "8540.md")
+(include "e300c2c3.md")
 (include "power4.md")
 (include "power5.md")
 (include "power6.md")
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 4088ef73cdf..dccbc1ea350 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -3838,6 +3838,7 @@ find_barrier (int num_mova, rtx mova, rtx from)
   rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
   int si_limit;
   int hi_limit;
+  rtx orig = from;
 
   /* For HImode: range is 510, add 4 because pc counts from address of
      second instruction after this one, subtract 2 for the jump instruction
@@ -3897,6 +3898,7 @@ find_barrier (int num_mova, rtx mova, rtx from)
 
       if (GET_CODE (from) == BARRIER)
 	{
+	  rtx next;
 
 	  found_barrier = from;
 
@@ -3905,6 +3907,14 @@ find_barrier (int num_mova, rtx mova, rtx from)
 	     this kind of barrier.  */
 	  if (barrier_align (from) > 2)
 	    good_barrier = from;
+
+	  /* If we are at the end of a hot/cold block, dump the constants
+	     here.  */
+	  next = NEXT_INSN (from);
+	  if (next
+	      && NOTE_P (next)
+	      && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
+	    break;
 	}
 
       if (broken_move (from))
@@ -4061,7 +4071,8 @@ find_barrier (int num_mova, rtx mova, rtx from)
       /* If we exceeded the range, then we must back up over the last
 	 instruction we looked at.  Otherwise, we just need to undo the
 	 NEXT_INSN at the end of the loop.  */
-      if (count_hi > hi_limit || count_si > si_limit)
+      if (PREV_INSN (from) != orig
+	  && (count_hi > hi_limit || count_si > si_limit))
 	from = PREV_INSN (PREV_INSN (from));
       else
 	from = PREV_INSN (from);
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index d7e2727adac..5a580b517cd 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -7378,7 +7378,7 @@ label:
 (define_insn "jump_compact"
   [(set (pc)
 	(label_ref (match_operand 0 "" "")))]
-  "TARGET_SH1"
+  "TARGET_SH1 && !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)"
   "*
 {
   /* The length is 16 if the delay slot is unfilled.  */
diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md
index f1bfdaff607..14854606b8d 100644
--- a/gcc/config/spu/spu.md
+++ b/gcc/config/spu/spu.md
@@ -4455,3 +4455,179 @@ selb\t%0,%4,%0,%3"
 
   DONE;
 }")
+
+(define_expand "vec_unpacku_hi_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (zero_extend:V4SI 
+          (vec_select:V4HI
+            (match_operand:V8HI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
+  ""
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
+    0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+         (zero_extend:V4SI
+          (vec_select:V4HI
+            (match_operand:V8HI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+""
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
+    0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+  
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+         (sign_extend:V4SI
+          (vec_select:V4HI
+            (match_operand:V8HI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
+  ""
+{
+  rtx tmp1 = gen_reg_rtx (V8HImode);
+  rtx tmp2 = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
+    0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+  emit_insn (gen_spu_xshw (tmp2, tmp1)); 
+  emit_move_insn (operands[0], tmp2);
+
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+         (sign_extend:V4SI
+          (vec_select:V4HI
+            (match_operand:V8HI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+""
+{
+  rtx tmp1 = gen_reg_rtx (V8HImode);
+  rtx tmp2 = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
+    0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+  emit_insn (gen_spu_xshw (tmp2, tmp1)); 
+  emit_move_insn (operands[0], tmp2);
+
+DONE;
+})
+
+(define_expand "vec_unpacku_hi_v16qi"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+        (zero_extend:V8HI
+          (vec_select:V8QI
+            (match_operand:V16QI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
+                       (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+  ""
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
+    0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_v16qi"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+          (zero_extend:V8HI
+          (vec_select:V8QI
+            (match_operand:V16QI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
+                       (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
+""
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
+    0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v16qi"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+         (sign_extend:V8HI
+          (vec_select:V8QI
+            (match_operand:V16QI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
+                       (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+""
+{
+  rtx tmp1 = gen_reg_rtx (V16QImode);
+  rtx tmp2 = gen_reg_rtx (V8HImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
+    0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+  emit_insn (gen_spu_xsbh (tmp2, tmp1));
+  emit_move_insn (operands[0], tmp2);
+
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v16qi"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+         (sign_extend:V8HI
+          (vec_select:V8QI
+            (match_operand:V16QI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
+                       (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
+""
+{
+  rtx tmp1 = gen_reg_rtx (V16QImode);
+  rtx tmp2 = gen_reg_rtx (V8HImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
+    0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+  emit_insn (gen_spu_xsbh (tmp2, tmp1));
+  emit_move_insn (operands[0], tmp2);
+
+DONE;
+})
+
+
+
+
author	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2008-02-26 13:09:58 +0000
committer	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2008-02-26 13:09:58 +0000
commit	b8053af55de78a3f080783e5113fd6452e5a43c5 (patch)
tree	a5906142e844e296abb7382e34657faf4e58f74f /gcc/config
parent	4896274c9597b09d4c61bdd2efb3201a72634b3c (diff)
download	gcc-b8053af55de78a3f080783e5113fd6452e5a43c5.tar.gz