31 files changed, 907 insertions, 539 deletions
diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
index 5c96cfd53cd..cd6a815a483 100644
--- a/gcc/config/darwin.c
+++ b/gcc/config/darwin.c
@@ -1,6 +1,6 @@
 /* Functions for generic Darwin as target machine for GNU C compiler.
    Copyright (C) 1989, 1990, 1991, 1992, 1993, 2000, 2001, 2002, 2003, 2004,
-   2005, 2006, 2007, 2008, 2009, 2010
+   2005, 2006, 2007, 2008, 2009, 2010, 2011
    Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
@@ -160,6 +160,11 @@ output_objc_section_asm_op (const void *directive)
   output_section_asm_op (directive);
 }
 
+
+/* Private flag applied to disable section-anchors in a particular section.  */
+#define SECTION_NO_ANCHOR SECTION_MACH_DEP
+
+
 /* Implement TARGET_ASM_INIT_SECTIONS.  */
 
 void
@@ -177,10 +182,6 @@ darwin_init_sections (void)
   readonly_data_section = darwin_sections[const_section];
   exception_section = darwin_sections[darwin_exception_section];
   eh_frame_section = darwin_sections[darwin_eh_frame_section];
-
-  /* Make sure that there is no conflict between the 'no anchor' section
-     flag declared in darwin.h and the section flags declared in output.h.  */
-  gcc_assert (SECTION_NO_ANCHOR > SECTION_MACH_DEP);
 }
 
 int
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 74ced9c31ca..778ff1e3023 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -1,6 +1,6 @@
 /* Target definitions for Darwin (Mac OS X) systems.
    Copyright (C) 1989, 1990, 1991, 1992, 1993, 2000, 2001, 2002, 2003, 2004,
-   2005, 2006, 2007, 2008, 2009, 2010
+   2005, 2006, 2007, 2008, 2009, 2010, 2011
    Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
@@ -651,11 +651,6 @@ int darwin_label_is_anonymous_local_objc_name (const char *name);
 /* The generic version, archs should over-ride where required.  */
 #define MACHOPIC_NL_SYMBOL_PTR_SECTION ".non_lazy_symbol_pointer"
 
-/* Private flag applied to disable section-anchors in a particular section.
-   This needs to be kept in sync with the flags used by varasm.c (defined in
-   output.h).  */
-#define SECTION_NO_ANCHOR 0x2000000
-
 /* Declare the section variables.  */
 #ifndef USED_FOR_TARGET
 enum darwin_section_enum {
diff --git a/gcc/config/hpux11.opt b/gcc/config/hpux11.opt
index 7b572a44196..507d9b890bd 100644
--- a/gcc/config/hpux11.opt
+++ b/gcc/config/hpux11.opt
@@ -27,4 +27,7 @@
 mt
 Driver RejectNegative
 
+pthread
+Driver
+
 ; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/i386/djgpp.opt b/gcc/config/i386/djgpp.opt
index 013bdf0bf51..7e4affca944 100644
--- a/gcc/config/i386/djgpp.opt
+++ b/gcc/config/i386/djgpp.opt
@@ -1,6 +1,6 @@
 ; DJGPP-specific options.
 
-; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+; Copyright (C) 2005, 2007, 2011 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -23,3 +23,6 @@
 mbnu210
 Target Var(TARGET_BNU210)
 Ignored (obsolete)
+
+posix
+Driver
diff --git a/gcc/config/i386/sol2.h b/gcc/config/i386/sol2.h
index 411e02dc8d5..baddbb0b929 100644
--- a/gcc/config/i386/sol2.h
+++ b/gcc/config/i386/sol2.h
@@ -1,6 +1,6 @@
 /* Target definitions for GCC for Intel 80386 running Solaris 2
    Copyright (C) 1993, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+   2004, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
    Contributed by Fred Fish (fnf@cygnus.com).
 
 This file is part of GCC.
@@ -137,9 +137,6 @@ along with GCC; see the file COPYING3.  If not see
 /* Register the Solaris-specific #pragma directives.  */
 #define REGISTER_SUBTARGET_PRAGMAS() solaris_register_pragmas ()
 
-/* Undo i386/sysv4.h version.  */
-#undef SUBTARGET_RETURN_IN_MEMORY
-
 /* Augment i386/unix.h version to return 8-byte vectors in memory, matching
    Sun Studio compilers until version 12, the only ones supported on
    Solaris 8 and 9.  */
diff --git a/gcc/config/i386/sysv4.h b/gcc/config/i386/sysv4.h
index 63c0cbc85a7..64026e72b66 100644
--- a/gcc/config/i386/sysv4.h
+++ b/gcc/config/i386/sysv4.h
@@ -1,5 +1,6 @@
 /* Target definitions for GCC for Intel 80386 running System V.4
-   Copyright (C) 1991, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.
+   Copyright (C) 1991, 2001, 2002, 2007, 2008, 2011
+   Free Software Foundation, Inc.
 
    Written by Ron Guilmette (rfg@netcom.com).
 
@@ -19,16 +20,6 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-
-#define TARGET_VERSION fprintf (stderr, " (i386 System V Release 4)");
-
-/* The svr4 ABI for the i386 says that records and unions are returned
-   in memory.  */
-
-#define SUBTARGET_RETURN_IN_MEMORY(TYPE, FNTYPE) \
-	(TYPE_MODE (TYPE) == BLKmode \
-	 || (VECTOR_MODE_P (TYPE_MODE (TYPE)) && int_size_in_bytes (TYPE) == 8));
-
 /* Output at beginning of assembler file.  */
 /* The .file command should always begin the output.  */
 
@@ -39,56 +30,6 @@ along with GCC; see the file COPYING3.  If not see
 #undef DBX_REGISTER_NUMBER
 #define DBX_REGISTER_NUMBER(n)  svr4_dbx_register_map[n]
 
-/* The routine used to output sequences of byte values.  We use a special
-   version of this for most svr4 targets because doing so makes the
-   generated assembly code more compact (and thus faster to assemble)
-   as well as more readable.  Note that if we find subparts of the
-   character sequence which end with NUL (and which are shorter than
-   STRING_LIMIT) we output those using ASM_OUTPUT_LIMITED_STRING.  */
-
-#undef ASM_OUTPUT_ASCII
-#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH)				\
-  do									\
-    {									\
-      const unsigned char *_ascii_bytes =				\
-        (const unsigned char *) (STR);					\
-      const unsigned char *limit = _ascii_bytes + (LENGTH);		\
-      unsigned bytes_in_chunk = 0;					\
-      for (; _ascii_bytes < limit; _ascii_bytes++)			\
-	{								\
-	  const unsigned char *p;					\
-	  if (bytes_in_chunk >= 64)					\
-	    {								\
-	      fputc ('\n', (FILE));					\
-	      bytes_in_chunk = 0;					\
-	    }								\
-	  for (p = _ascii_bytes; p < limit && *p != '\0'; p++)		\
-	    continue;							\
-	  if (p < limit && (p - _ascii_bytes) <= (long) STRING_LIMIT)	\
-	    {								\
-	      if (bytes_in_chunk > 0)					\
-		{							\
-		  fputc ('\n', (FILE));					\
-		  bytes_in_chunk = 0;					\
-		}							\
-	      ASM_OUTPUT_LIMITED_STRING ((FILE), _ascii_bytes);		\
-	      _ascii_bytes = p;						\
-	    }								\
-	  else								\
-	    {								\
-	      if (bytes_in_chunk == 0)					\
-		fputs (ASM_BYTE, (FILE));				\
-	      else							\
-		fputc (',', (FILE));					\
-	      fprintf ((FILE), "0x%02x", *_ascii_bytes);		\
-	      bytes_in_chunk += 5;					\
-	    }								\
-	}								\
-      if (bytes_in_chunk > 0)						\
-	fputc ('\n', (FILE));						\
-    }									\
-  while (0)
-
 /* A C statement (sans semicolon) to output to the stdio stream
    FILE the assembler definition of uninitialized global DECL named
    NAME whose size is SIZE bytes and alignment is ALIGN bytes.
diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h
index 87e04a23d76..107a7ccb983 100644
--- a/gcc/config/ia64/ia64-protos.h
+++ b/gcc/config/ia64/ia64-protos.h
@@ -39,9 +39,9 @@ extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]);
 extern void ia64_expand_compare (rtx *, rtx *, rtx *);
 extern void ia64_expand_vecint_cmov (rtx[]);
 extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
+extern void ia64_unpack_assemble (rtx, rtx, rtx, bool);
 extern void ia64_expand_unpack (rtx [], bool, bool);
 extern void ia64_expand_widen_sum (rtx[], bool);
-extern void ia64_expand_widen_mul_v4hi (rtx [], bool, bool);
 extern void ia64_expand_dot_prod_v8qi (rtx[], bool);
 extern void ia64_expand_call (rtx, rtx, rtx, int);
 extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int);
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 1842555502b..1d3f8b18d34 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -1972,12 +1972,13 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
   return true;
 }
 
-/* Emit an integral vector unpack operation.  */
+/* The vectors LO and HI each contain N halves of a double-wide vector.
+   Reassemble either the first N/2 or the second N/2 elements.  */
 
 void
-ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
+ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
 {
-  enum machine_mode mode = GET_MODE (operands[1]);
+  enum machine_mode mode = GET_MODE (lo);
   rtx (*gen) (rtx, rtx, rtx);
   rtx x;
 
@@ -1993,110 +1994,66 @@ ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
       gcc_unreachable ();
     }
 
-  /* Fill in x with the sign extension of each element in op1.  */
-  if (unsignedp)
-    x = CONST0_RTX (mode);
-  else
-    {
-      bool neg;
-
-      x = gen_reg_rtx (mode);
-
-      neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
-					CONST0_RTX (mode));
-      gcc_assert (!neg);
-    }
-
+  x = gen_lowpart (mode, out);
   if (TARGET_BIG_ENDIAN)
-    emit_insn (gen (gen_lowpart (mode, operands[0]), x, operands[1]));
+    x = gen (x, hi, lo);
   else
-    emit_insn (gen (gen_lowpart (mode, operands[0]), operands[1], x));
+    x = gen (x, lo, hi);
+  emit_insn (x);
 }
 
-/* Emit an integral vector widening sum operations.  */
+/* Return a vector of the sign-extension of VEC.  */
 
-void
-ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
+static rtx
+ia64_unpack_sign (rtx vec, bool unsignedp)
 {
-  rtx l, h, x, s;
-  enum machine_mode wmode, mode;
-  rtx (*unpack_l) (rtx, rtx, rtx);
-  rtx (*unpack_h) (rtx, rtx, rtx);
-  rtx (*plus) (rtx, rtx, rtx);
-
-  wmode = GET_MODE (operands[0]);
-  mode = GET_MODE (operands[1]);
+  enum machine_mode mode = GET_MODE (vec);
+  rtx zero = CONST0_RTX (mode);
 
-  switch (mode)
-    {
-    case V8QImode:
-      unpack_l = gen_vec_interleave_lowv8qi;
-      unpack_h = gen_vec_interleave_highv8qi;
-      plus = gen_addv4hi3;
-      break;
-    case V4HImode:
-      unpack_l = gen_vec_interleave_lowv4hi;
-      unpack_h = gen_vec_interleave_highv4hi;
-      plus = gen_addv2si3;
-      break;
-    default:
-      gcc_unreachable ();
-    }
-
-  /* Fill in x with the sign extension of each element in op1.  */
   if (unsignedp)
-    x = CONST0_RTX (mode);
+    return zero;
   else
     {
+      rtx sign = gen_reg_rtx (mode);
       bool neg;
 
-      x = gen_reg_rtx (mode);
-
-      neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
-					CONST0_RTX (mode));
+      neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
       gcc_assert (!neg);
+
+      return sign;
     }
+}
 
-  l = gen_reg_rtx (wmode);
-  h = gen_reg_rtx (wmode);
-  s = gen_reg_rtx (wmode);
+/* Emit an integral vector unpack operation.  */
 
-  if (TARGET_BIG_ENDIAN)
-    {
-      emit_insn (unpack_l (gen_lowpart (mode, l), x, operands[1]));
-      emit_insn (unpack_h (gen_lowpart (mode, h), x, operands[1]));
-    }
-  else
-    {
-      emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
-      emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
-    }
-  emit_insn (plus (s, l, operands[2]));
-  emit_insn (plus (operands[0], h, s));
+void
+ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
+{
+  rtx sign = ia64_unpack_sign (operands[1], unsignedp);
+  ia64_unpack_assemble (operands[0], operands[1], sign, highp);
 }
 
+/* Emit an integral vector widening sum operations.  */
+
 void
-ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp)
+ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
 {
-  rtx l = gen_reg_rtx (V4HImode);
-  rtx h = gen_reg_rtx (V4HImode);
-  rtx (*mulhigh)(rtx, rtx, rtx, rtx);
-  rtx (*interl)(rtx, rtx, rtx);
+  enum machine_mode wmode;
+  rtx l, h, t, sign;
 
-  emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+  sign = ia64_unpack_sign (operands[1], unsignedp);
+
+  wmode = GET_MODE (operands[0]);
+  l = gen_reg_rtx (wmode);
+  h = gen_reg_rtx (wmode);
 
-  /* For signed, pmpy2.r would appear to more closely match this operation.
-     However, the vectorizer is more likely to use the LO and HI patterns
-     in pairs. At which point, with this formulation, the first two insns
-     of each can be CSEd.  */
-  mulhigh = unsignedp ? gen_pmpyshr2_u : gen_pmpyshr2;
-  emit_insn (mulhigh (h, operands[1], operands[2], GEN_INT (16)));
+  ia64_unpack_assemble (l, operands[1], sign, false);
+  ia64_unpack_assemble (h, operands[1], sign, true);
 
-  interl = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi;
-  if (TARGET_BIG_ENDIAN)
-    emit_insn (interl (gen_lowpart (V4HImode, operands[0]), h, l));
-  else
-    emit_insn (interl (gen_lowpart (V4HImode, operands[0]), l, h));
+  t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
+  t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
+  if (t != operands[0])
+    emit_move_insn (operands[0], t);
 }
 
 /* Emit a signed or unsigned V8QI dot product operation.  */
@@ -2104,62 +2061,31 @@ ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp)
 void
 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
 {
-  rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
+  rtx op1, op2, sn1, sn2, l1, l2, h1, h2;
+  rtx p1, p2, p3, p4, s1, s2, s3;
 
-  /* Fill in x1 and x2 with the sign extension of each element.  */
-  if (unsignedp)
-    x1 = x2 = CONST0_RTX (V8QImode);
-  else
-    {
-      bool neg;
-
-      x1 = gen_reg_rtx (V8QImode);
-      x2 = gen_reg_rtx (V8QImode);
-
-      neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
-					CONST0_RTX (V8QImode));
-      gcc_assert (!neg);
-      neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
-					CONST0_RTX (V8QImode));
-      gcc_assert (!neg);
-    }
+  op1 = operands[1];
+  op2 = operands[2];
+  sn1 = ia64_unpack_sign (op1, unsignedp);
+  sn2 = ia64_unpack_sign (op2, unsignedp);
 
   l1 = gen_reg_rtx (V4HImode);
   l2 = gen_reg_rtx (V4HImode);
   h1 = gen_reg_rtx (V4HImode);
   h2 = gen_reg_rtx (V4HImode);
-
-  if (TARGET_BIG_ENDIAN)
-    {
-      emit_insn (gen_vec_interleave_lowv8qi
-		 (gen_lowpart (V8QImode, l1), x1, operands[1]));
-      emit_insn (gen_vec_interleave_lowv8qi
-		 (gen_lowpart (V8QImode, l2), x2, operands[2]));
-      emit_insn (gen_vec_interleave_highv8qi
-		 (gen_lowpart (V8QImode, h1), x1, operands[1]));
-      emit_insn (gen_vec_interleave_highv8qi
-		 (gen_lowpart (V8QImode, h2), x2, operands[2]));
-    }
-  else
-    {
-      emit_insn (gen_vec_interleave_lowv8qi
-		 (gen_lowpart (V8QImode, l1), operands[1], x1));
-      emit_insn (gen_vec_interleave_lowv8qi
-		 (gen_lowpart (V8QImode, l2), operands[2], x2));
-      emit_insn (gen_vec_interleave_highv8qi
-		 (gen_lowpart (V8QImode, h1), operands[1], x1));
-      emit_insn (gen_vec_interleave_highv8qi
-		 (gen_lowpart (V8QImode, h2), operands[2], x2));
-    }
+  ia64_unpack_assemble (l1, op1, sn1, false);
+  ia64_unpack_assemble (l2, op2, sn2, false);
+  ia64_unpack_assemble (h1, op1, sn1, true);
+  ia64_unpack_assemble (h2, op2, sn2, true);
 
   p1 = gen_reg_rtx (V2SImode);
   p2 = gen_reg_rtx (V2SImode);
   p3 = gen_reg_rtx (V2SImode);
   p4 = gen_reg_rtx (V2SImode);
-  emit_insn (gen_pmpy2_r (p1, l1, l2));
-  emit_insn (gen_pmpy2_l (p2, l1, l2));
-  emit_insn (gen_pmpy2_r (p3, h1, h2));
-  emit_insn (gen_pmpy2_l (p4, h1, h2));
+  emit_insn (gen_pmpy2_even (p1, l1, l2));
+  emit_insn (gen_pmpy2_even (p2, h1, h2));
+  emit_insn (gen_pmpy2_odd (p3, l1, l2));
+  emit_insn (gen_pmpy2_odd (p4, h1, h2));
 
   s1 = gen_reg_rtx (V2SImode);
   s2 = gen_reg_rtx (V2SImode);
diff --git a/gcc/config/ia64/predicates.md b/gcc/config/ia64/predicates.md
index e06c521a056..6622b2001f4 100644
--- a/gcc/config/ia64/predicates.md
+++ b/gcc/config/ia64/predicates.md
@@ -624,3 +624,7 @@
   return REG_P (op) && REG_POINTER (op);
 })
 
+;; True if this is the right-most vector element; for mux1 @brcst.
+(define_predicate "mux1_brcst_element"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == (TARGET_BIG_ENDIAN ? 7 : 0)")))
diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md
index 4e5977c05a2..ad367214d9d 100644
--- a/gcc/config/ia64/vect.md
+++ b/gcc/config/ia64/vect.md
@@ -172,35 +172,14 @@
 		   (match_operand:V8QI 2 "gr_register_operand" "r")))]
   ""
 {
-  rtx r1, l1, r2, l2, rm, lm;
-
-  r1 = gen_reg_rtx (V4HImode);
-  l1 = gen_reg_rtx (V4HImode);
-  r2 = gen_reg_rtx (V4HImode);
-  l2 = gen_reg_rtx (V4HImode);
-
-  /* Zero-extend the QImode elements into two words of HImode elements
-     by interleaving them with zero bytes.  */
-  emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r1),
-                         operands[1], CONST0_RTX (V8QImode)));
-  emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r2),
-                         operands[2], CONST0_RTX (V8QImode)));
-  emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l1),
-                         operands[1], CONST0_RTX (V8QImode)));
-  emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l2),
-                         operands[2], CONST0_RTX (V8QImode)));
-
-  /* Multiply.  */
-  rm = gen_reg_rtx (V4HImode);
-  lm = gen_reg_rtx (V4HImode);
-  emit_insn (gen_mulv4hi3 (rm, r1, r2));
-  emit_insn (gen_mulv4hi3 (lm, l1, l2));
-
-  /* Zap the high order bytes of the HImode elements by overwriting those
-     in one part with the low order bytes of the other.  */
-  emit_insn (gen_mix1_r (operands[0],
-                         gen_lowpart (V8QImode, rm),
-                         gen_lowpart (V8QImode, lm)));
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_vec_widen_umult_lo_v8qi (l, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_hi_v8qi (h, operands[1], operands[2]));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_vec_pack_trunc_v4hi (operands[0], h, l));
+  else
+    emit_insn (gen_vec_pack_trunc_v4hi (operands[0], l, h));
   DONE;
 })
 
@@ -296,7 +275,7 @@
   "pmpyshr2.u %0 = %1, %2, %3"
   [(set_attr "itanium_class" "mmmul")])
 
-(define_insn "pmpy2_r"
+(define_insn "pmpy2_even"
   [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
 	(mult:V2SI
 	  (vec_select:V2SI
@@ -308,10 +287,16 @@
 	      (match_operand:V4HI 2 "gr_register_operand" "r"))
 	    (parallel [(const_int 0) (const_int 2)]))))]
   ""
-  "pmpy2.r %0 = %1, %2"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pmpy2.l %0 = %1, %2";
+  else
+    return "%,pmpy2.r %0 = %1, %2";
+}
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "pmpy2_l"
+(define_insn "pmpy2_odd"
   [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
 	(mult:V2SI
 	  (vec_select:V2SI
@@ -323,7 +308,13 @@
 	      (match_operand:V4HI 2 "gr_register_operand" "r"))
 	    (parallel [(const_int 1) (const_int 3)]))))]
   ""
-  "pmpy2.l %0 = %1, %2"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pmpy2.r %0 = %1, %2";
+  else
+    return "%,pmpy2.l %0 = %1, %2";
+}
   [(set_attr "itanium_class" "mmshf")])
 
 (define_expand "vec_widen_smult_lo_v4hi"
@@ -332,7 +323,11 @@
    (match_operand:V4HI 2 "gr_register_operand" "")]
   ""
 {
-  ia64_expand_widen_mul_v4hi (operands, false, false);
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+  emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16)));
+  ia64_unpack_assemble (operands[0], l, h, false);
   DONE;
 })
 
@@ -342,7 +337,11 @@
    (match_operand:V4HI 2 "gr_register_operand" "")]
   ""
 {
-  ia64_expand_widen_mul_v4hi (operands, false, true);
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+  emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16)));
+  ia64_unpack_assemble (operands[0], l, h, true);
   DONE;
 })
 
@@ -352,7 +351,11 @@
    (match_operand:V4HI 2 "gr_register_operand" "")]
   ""
 {
-  ia64_expand_widen_mul_v4hi (operands, true, false);
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+  emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16)));
+  ia64_unpack_assemble (operands[0], l, h, false);
   DONE;
 })
 
@@ -362,7 +365,11 @@
    (match_operand:V4HI 2 "gr_register_operand" "")]
   ""
 {
-  ia64_expand_widen_mul_v4hi (operands, true, true);
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+  emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16)));
+  ia64_unpack_assemble (operands[0], l, h, true);
   DONE;
 })
 
@@ -390,12 +397,8 @@
      of the full 32-bit product.  */
 
   /* T0 = CDBA.  */
-  if (TARGET_BIG_ENDIAN)
-    x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, GEN_INT (3), const2_rtx,
-					       const1_rtx, const0_rtx));
-  else
-    x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx,
-					       GEN_INT (3), const2_rtx));
+  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx,
+					     GEN_INT (3), const2_rtx));
   x = gen_rtx_VEC_SELECT (V4HImode, op1h, x);
   emit_insn (gen_rtx_SET (VOIDmode, t0, x));
 
@@ -409,15 +412,28 @@
   emit_insn (gen_mulv4hi3 (t3, t0, op2h));
 
   /* T4 = CY.h, CY.l, AW.h, AW.l = CY, AW.  */
-  emit_insn (gen_mix2_r (gen_lowpart (V4HImode, t4), t1, t2));
+  x = gen_lowpart (V4HImode, t4);
+  if (TARGET_BIG_ENDIAN)
+    x = gen_mix2_odd (x, t2, t1);
+  else
+    x = gen_mix2_even (x, t1, t2);
+  emit_insn (x);
 
   /* T5 = CZ.l, 0, AX.l, 0 = CZ << 16, AX << 16.  */
-  emit_insn (gen_mix2_l (gen_lowpart (V4HImode, t5),
-			 CONST0_RTX (V4HImode), t3));
+  x = gen_lowpart (V4HImode, t5);
+  if (TARGET_BIG_ENDIAN)
+    x = gen_mix2_even (x, t3, CONST0_RTX (V4HImode));
+  else
+    x = gen_mix2_odd (x, CONST0_RTX (V4HImode), t3);
+  emit_insn (x);
 
   /* T6 = DY.l, 0, BW.l, 0 = DY << 16, BW << 16.  */
-  emit_insn (gen_mix2_r (gen_lowpart (V4HImode, t6),
-			 CONST0_RTX (V4HImode), t3));
+  x = gen_lowpart (V4HImode, t6);
+  if (TARGET_BIG_ENDIAN)
+    x = gen_mix2_odd (x, t3, CONST0_RTX (V4HImode));
+  else
+    x = gen_mix2_even (x, CONST0_RTX (V4HImode), t3);
+  emit_insn (x);
 
   emit_insn (gen_addv2si3 (t7, t4, t5));
   emit_insn (gen_addv2si3 (operands[0], t6, t7));
@@ -612,16 +628,36 @@
    (match_operand:V2SI 3 "gr_register_operand" "")]
   ""
 {
-  rtx l, r, t;
+  rtx e, o, t;
+
+  e = gen_reg_rtx (V2SImode);
+  o = gen_reg_rtx (V2SImode);
+  t = gen_reg_rtx (V2SImode);
+
+  emit_insn (gen_pmpy2_even (e, operands[1], operands[2]));
+  emit_insn (gen_pmpy2_odd (o, operands[1], operands[2]));
+  emit_insn (gen_addv2si3 (t, e, operands[3]));
+  emit_insn (gen_addv2si3 (operands[0], t, o));
+  DONE;
+})
+
+(define_expand "udot_prodv4hi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")
+   (match_operand:V2SI 3 "gr_register_operand" "")]
+  ""
+{
+  rtx l, h, t;
 
-  r = gen_reg_rtx (V2SImode);
   l = gen_reg_rtx (V2SImode);
+  h = gen_reg_rtx (V2SImode);
   t = gen_reg_rtx (V2SImode);
 
-  emit_insn (gen_pmpy2_r (r, operands[1], operands[2]));
-  emit_insn (gen_pmpy2_l (l, operands[1], operands[2]));
-  emit_insn (gen_addv2si3 (t, r, operands[3]));
-  emit_insn (gen_addv2si3 (operands[0], t, l));
+  emit_insn (gen_vec_widen_umult_lo_v4hi (l, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_hi_v4hi (h, operands[1], operands[2]));
+  emit_insn (gen_addv2si3 (t, l, operands[3]));
+  emit_insn (gen_addv2si3 (operands[0], t, h));
   DONE;
 })
 
@@ -677,7 +713,13 @@
 	  (ss_truncate:V4QI
 	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))]
   ""
-  "pack2.sss %0 = %r1, %r2"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pack2.sss %0 = %r2, %r1";
+  else
+    return "%,pack2.sss %0 = %r1, %r2";
+}
   [(set_attr "itanium_class" "mmshf")])
 
 (define_insn "vec_pack_usat_v4hi"
@@ -688,7 +730,13 @@
 	  (us_truncate:V4QI
 	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))]
   ""
-  "pack2.uss %0 = %r1, %r2"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pack2.uss %0 = %r2, %r1";
+  else
+    return "%,pack2.uss %0 = %r1, %r2";
+}
   [(set_attr "itanium_class" "mmshf")])
 
 (define_insn "vec_pack_ssat_v2si"
@@ -699,7 +747,13 @@
 	  (ss_truncate:V2HI
 	    (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))))]
   ""
-  "pack4.sss %0 = %r1, %r2"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pack4.sss %0 = %r2, %r1";
+  else
+    return "%,pack4.sss %0 = %r1, %r2";
+}
   [(set_attr "itanium_class" "mmshf")])
 
 (define_insn "vec_interleave_lowv8qi"
@@ -742,54 +796,54 @@
 }
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "mix1_r"
+(define_insn "mix1_even"
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_select:V8QI
 	  (vec_concat:V16QI
 	    (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
 	    (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
-	  (parallel [(const_int 0)
-		     (const_int 8)
-		     (const_int 2)
-		     (const_int 10)
-		     (const_int 4)
-		     (const_int 12)
-		     (const_int 6)
-		     (const_int 14)])))]
-  ""
-  "mix1.r %0 = %r2, %r1"
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 2) (const_int 10)
+		     (const_int 4) (const_int 12)
+		     (const_int 6) (const_int 14)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,mix1.l %0 = %r1, %r2";
+  else
+    return "%,mix1.r %0 = %r2, %r1";
+}
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "mix1_l"
+(define_insn "mix1_odd"
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_select:V8QI
 	  (vec_concat:V16QI
 	    (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
 	    (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
-	  (parallel [(const_int 1)
-		     (const_int 9)
-		     (const_int 3)
-		     (const_int 11)
-		     (const_int 5)
-		     (const_int 13)
-		     (const_int 7)
-		     (const_int 15)])))]
-  ""
-  "mix1.l %0 = %r2, %r1"
+	  (parallel [(const_int 1) (const_int 9)
+		     (const_int 3) (const_int 11)
+		     (const_int 5) (const_int 13)
+		     (const_int 7) (const_int 15)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,mix1.r %0 = %r1, %r2";
+  else
+    return "%,mix1.l %0 = %r2, %r1";
+}
   [(set_attr "itanium_class" "mmshf")])
 
 (define_insn "*mux1_rev"
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_select:V8QI
 	  (match_operand:V8QI 1 "gr_register_operand" "r")
-	  (parallel [(const_int 7)
-		     (const_int 6)
-		     (const_int 5)
-		     (const_int 4)
-		     (const_int 3)
-		     (const_int 2)
-		     (const_int 1)
-		     (const_int 0)])))]
+	  (parallel [(const_int 7) (const_int 6)
+		     (const_int 5) (const_int 4)
+		     (const_int 3) (const_int 2)
+		     (const_int 1) (const_int 0)])))]
   ""
   "mux1 %0 = %1, @rev"
   [(set_attr "itanium_class" "mmshf")])
@@ -798,14 +852,10 @@
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_select:V8QI
 	  (match_operand:V8QI 1 "gr_register_operand" "r")
-	  (parallel [(const_int 0)
-		     (const_int 4)
-		     (const_int 2)
-		     (const_int 6)
-		     (const_int 1)
-		     (const_int 5)
-		     (const_int 3)
-		     (const_int 7)])))]
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)
+		     (const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
   ""
   "mux1 %0 = %1, @mix"
   [(set_attr "itanium_class" "mmshf")])
@@ -814,14 +864,10 @@
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_select:V8QI
 	  (match_operand:V8QI 1 "gr_register_operand" "r")
-	  (parallel [(const_int 0)
-		     (const_int 4)
-		     (const_int 1)
-		     (const_int 5)
-		     (const_int 2)
-		     (const_int 6)
-		     (const_int 3)
-		     (const_int 7)])))]
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)
+		     (const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
   ""
   "mux1 %0 = %1, @shuf"
   [(set_attr "itanium_class" "mmshf")])
@@ -830,14 +876,10 @@
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_select:V8QI
 	  (match_operand:V8QI 1 "gr_register_operand" "r")
-	  (parallel [(const_int 0)
-		     (const_int 2)
-		     (const_int 4)
-		     (const_int 6)
-		     (const_int 1)
-		     (const_int 3)
-		     (const_int 5)
-		     (const_int 7)])))]
+	  (parallel [(const_int 0) (const_int 2)
+		     (const_int 4) (const_int 6)
+		     (const_int 1) (const_int 3)
+		     (const_int 5) (const_int 7)])))]
   ""
   "mux1 %0 = %1, @alt"
   [(set_attr "itanium_class" "mmshf")])
@@ -846,14 +888,14 @@
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_select:V8QI
 	  (match_operand:V8QI 1 "gr_register_operand" "r")
-	  (parallel [(const_int 0)
-		     (const_int 0)
-		     (const_int 0)
-		     (const_int 0)
-		     (const_int 0)
-		     (const_int 0)
-		     (const_int 0)
-		     (const_int 0)])))]
+	  (parallel [(match_operand 2 "mux1_brcst_element" "")
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)])))]
   ""
   "mux1 %0 = %1, @brcst"
   [(set_attr "itanium_class" "mmshf")])
@@ -873,10 +915,7 @@
   ""
 {
   rtx temp = gen_reg_rtx (V8QImode);
-  if (TARGET_BIG_ENDIAN)
-    emit_insn (gen_mix1_l (temp, operands[2], operands[1]));
-  else
-    emit_insn (gen_mix1_r (temp, operands[1], operands[2]));
+  emit_insn (gen_mix1_even (temp, operands[1], operands[2]));
   emit_insn (gen_mux1_alt (operands[0], temp));
   DONE;
 })
@@ -888,10 +927,7 @@
   ""
 {
   rtx temp = gen_reg_rtx (V8QImode);
-  if (TARGET_BIG_ENDIAN)
-    emit_insn (gen_mix1_r (temp, operands[2], operands[1]));
-  else
-    emit_insn (gen_mix1_l (temp, operands[1], operands[2]));
+  emit_insn (gen_mix1_odd (temp, operands[1], operands[2]));
   emit_insn (gen_mux1_alt (operands[0], temp));
   DONE;
 })
@@ -902,10 +938,8 @@
 	  (vec_concat:V8HI
 	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
 	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
-	  (parallel [(const_int 0)
-		     (const_int 4)
-		     (const_int 1)
-		     (const_int 5)])))]
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
   ""
 {
   /* Recall that vector elements are numbered in memory order.  */
@@ -922,10 +956,8 @@
 	  (vec_concat:V8HI
 	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
 	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
-	  (parallel [(const_int 2)
-		     (const_int 6)
-		     (const_int 3)
-		     (const_int 7)])))]
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
   ""
 {
   /* Recall that vector elements are numbered in memory order.  */
@@ -936,32 +968,40 @@
 }
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "mix2_r"
+(define_insn "mix2_even"
   [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
 	(vec_select:V4HI
 	  (vec_concat:V8HI
 	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
 	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
-	  (parallel [(const_int 0)
-		     (const_int 4)
-		     (const_int 2)
-		     (const_int 6)])))]
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
   ""
-  "mix2.r %0 = %r2, %r1"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,mix2.l %0 = %r1, %r2";
+  else
+    return "%,mix2.r %0 = %r2, %r1";
+}
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "mix2_l"
+(define_insn "mix2_odd"
   [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
 	(vec_select:V4HI
 	  (vec_concat:V8HI
 	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
 	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
-	  (parallel [(const_int 1)
-		     (const_int 5)
-		     (const_int 3)
-		     (const_int 7)])))]
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
   ""
-  "mix2.l %0 = %r2, %r1"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,mix2.r %0 = %r1, %r2";
+  else
+    return "%,mix2.l %0 = %r2, %r1";
+}
   [(set_attr "itanium_class" "mmshf")])
 
 (define_insn "*mux2"
@@ -974,17 +1014,17 @@
 		     (match_operand 5 "const_int_2bit_operand" "")])))]
   ""
 {
-  int mask;
+  int mask = 0;
   if (TARGET_BIG_ENDIAN)
     {
-      mask  = INTVAL (operands[2]) << 4;
-      mask |= INTVAL (operands[3]) << 6;
-      mask |= INTVAL (operands[4]);
-      mask |= INTVAL (operands[5]) << 2;
+      mask |= (3 - INTVAL (operands[2])) << 6;
+      mask |= (3 - INTVAL (operands[3])) << 4;
+      mask |= (3 - INTVAL (operands[4])) << 2;
+      mask |= 3 - INTVAL (operands[5]);
     }
   else
     {
-      mask  = INTVAL (operands[2]);
+      mask |= INTVAL (operands[2]);
       mask |= INTVAL (operands[3]) << 2;
       mask |= INTVAL (operands[4]) << 4;
       mask |= INTVAL (operands[5]) << 6;
@@ -998,10 +1038,8 @@
   [(set (match_operand:V4HI 0 "gr_register_operand" "")
 	(vec_select:V4HI
 	  (match_operand:V4HI 1 "gr_register_operand" "")
-	  (parallel [(const_int 0)
-		     (const_int 2)
-		     (const_int 1)
-		     (const_int 3)])))]
+	  (parallel [(const_int 0) (const_int 2)
+		     (const_int 1) (const_int 3)])))]
   "")
 
 (define_expand "vec_extract_evenv4hi"
@@ -1011,10 +1049,7 @@
   ""
 {
   rtx temp = gen_reg_rtx (V4HImode);
-  if (TARGET_BIG_ENDIAN)
-    emit_insn (gen_mix2_l (temp, operands[1], operands[2]));
-  else
-    emit_insn (gen_mix2_r (temp, operands[1], operands[2]));
+  emit_insn (gen_mix2_even (temp, operands[1], operands[2]));
   emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
   DONE;
 })
@@ -1026,10 +1061,7 @@
   ""
 {
   rtx temp = gen_reg_rtx (V4HImode);
-  if (TARGET_BIG_ENDIAN)
-    emit_insn (gen_mix2_r (temp, operands[1], operands[2]));
-  else
-    emit_insn (gen_mix2_l (temp, operands[1], operands[2]));
+  emit_insn (gen_mix2_odd (temp, operands[1], operands[2]));
   emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
   DONE;
 })
@@ -1042,15 +1074,13 @@
   "mux2 %0 = %1, 0"
   [(set_attr "itanium_class" "mmshf")])
 
-;; Note that mix4.r performs the exact same operation.
 (define_insn "vec_interleave_lowv2si"
   [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
 	(vec_select:V2SI
 	  (vec_concat:V4SI
 	    (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")
 	    (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))
-	  (parallel [(const_int 0)
-		     (const_int 2)])))]
+	  (parallel [(const_int 0) (const_int 2)])))]
   ""
 {
   /* Recall that vector elements are numbered in memory order.  */
@@ -1061,15 +1091,13 @@
 }
   [(set_attr "itanium_class" "mmshf")])
 
-;; Note that mix4.l performs the exact same operation.
 (define_insn "vec_interleave_highv2si"
   [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
 	(vec_select:V2SI
 	  (vec_concat:V4SI
 	    (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")
 	    (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))
-	  (parallel [(const_int 1)
-		     (const_int 3)])))]
+	  (parallel [(const_int 1) (const_int 3)])))]
   ""
 {
   /* Recall that vector elements are numbered in memory order.  */
@@ -1088,7 +1116,7 @@
 {
   if (TARGET_BIG_ENDIAN)
     emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
-					   operands[2]));
+					    operands[2]));
   else
     emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
 					   operands[2]));
@@ -1103,7 +1131,7 @@
 {
   if (TARGET_BIG_ENDIAN)
     emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
-					    operands[2]));
+					   operands[2]));
   else
     emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
 					    operands[2]));
@@ -1131,10 +1159,7 @@
   if (!gr_reg_or_0_operand (op2, SImode))
     op2 = force_reg (SImode, op2);
 
-  if (TARGET_BIG_ENDIAN)
-    x = gen_rtx_VEC_CONCAT (V2SImode, op2, op1);
-  else
-    x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2);
+  x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2);
   emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
   DONE;
 })
@@ -1145,7 +1170,13 @@
 	  (match_operand:SI 1 "gr_reg_or_0_operand" "rO")
 	  (match_operand:SI 2 "gr_reg_or_0_operand" "rO")))]
   ""
-  "unpack4.l %0 = %r2, %r1"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack4.l %0 = %r1, %r2";
+  else
+    return "%,unpack4.l %0 = %r2, %r1";
+}
   [(set_attr "itanium_class" "mmshf")])
 
 ;; Missing operations
@@ -1315,7 +1346,10 @@
   ""
 {
   rtx tmp = gen_reg_rtx (V2SFmode);
-  emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
+  else
+    emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
   emit_insn (gen_addv2sf3 (operands[0], operands[1], tmp));
   DONE;
 })
@@ -1326,7 +1360,10 @@
   ""
 {
   rtx tmp = gen_reg_rtx (V2SFmode);
-  emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
+  else
+    emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
   emit_insn (gen_smaxv2sf3 (operands[0], operands[1], tmp));
   DONE;
 })
@@ -1337,7 +1374,10 @@
   ""
 {
   rtx tmp = gen_reg_rtx (V2SFmode);
-  emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
+  else
+    emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
   emit_insn (gen_sminv2sf3 (operands[0], operands[1], tmp));
   DONE;
 })
@@ -1403,10 +1443,7 @@
   if (!fr_reg_or_fp01_operand (op2, SFmode))
     op2 = force_reg (SFmode, op2);
 
-  if (TARGET_BIG_ENDIAN)
-    emit_insn (gen_fpack (operands[0], op2, op1));
-  else
-    emit_insn (gen_fpack (operands[0], op1, op2));
+  emit_insn (gen_fpack (operands[0], op1, op2));
   DONE;
 })
 
@@ -1416,7 +1453,13 @@
 	  (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
 	  (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
   ""
-  "fpack %0 = %F2, %F1"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fpack %0 = %F1, %F2";
+  else
+    return "%,fpack %0 = %F2, %F1";
+}
   [(set_attr "itanium_class" "fmisc")])
 
 (define_insn "fswap"
@@ -1427,7 +1470,13 @@
 	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
 	  (parallel [(const_int 1) (const_int 2)])))]
   ""
-  "fswap %0 = %F1, %F2"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fswap %0 = %F2, %F1";
+  else
+    return "%,fswap %0 = %F1, %F2";
+}
   [(set_attr "itanium_class" "fmisc")])
 
 (define_insn "vec_interleave_highv2sf"
@@ -1438,7 +1487,13 @@
 	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
 	  (parallel [(const_int 1) (const_int 3)])))]
   ""
-  "fmix.l %0 = %F2, %F1"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fmix.r %0 = %F1, %F2";
+  else
+    return "%,fmix.l %0 = %F2, %F1";
+}
   [(set_attr "itanium_class" "fmisc")])
 
 (define_insn "vec_interleave_lowv2sf"
@@ -1449,7 +1504,13 @@
 	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
 	  (parallel [(const_int 0) (const_int 2)])))]
   ""
-  "fmix.r %0 = %F2, %F1"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fmix.l %0 = %F1, %F2";
+  else
+    return "%,fmix.r %0 = %F2, %F1";
+}
   [(set_attr "itanium_class" "fmisc")])
 
 (define_insn "fmix_lr"
@@ -1460,7 +1521,13 @@
 	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
 	  (parallel [(const_int 0) (const_int 3)])))]
   ""
-  "fmix.lr %0 = %F2, %F1"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fmix.lr %0 = %F1, %F2";
+  else
+    return "%,fmix.lr %0 = %F2, %F1";
+}
   [(set_attr "itanium_class" "fmisc")])
 
 (define_expand "vec_extract_evenv2sf"
@@ -1485,23 +1552,24 @@
   DONE;
 })
 
-
 (define_expand "vec_setv2sf"
   [(match_operand:V2SF 0 "fr_register_operand" "")
    (match_operand:SF 1 "fr_register_operand" "")
    (match_operand 2 "const_int_operand" "")]
   ""
 {
+  rtx op0 = operands[0];
   rtx tmp = gen_reg_rtx (V2SFmode);
+
   emit_insn (gen_fpack (tmp, operands[1], CONST0_RTX (SFmode)));
 
   switch (INTVAL (operands[2]))
     {
     case 0:
-      emit_insn (gen_fmix_lr (operands[0], tmp, operands[0]));
+      emit_insn (gen_fmix_lr (op0, tmp, op0));
       break;
     case 1:
-      emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[0], tmp));
+      emit_insn (gen_vec_interleave_lowv2sf (op0, op0, tmp));
       break;
     default:
       gcc_unreachable ();
@@ -1528,8 +1596,8 @@
 })
 
 (define_insn_and_split "*vec_extractv2sf_0_be"
-  [(set (match_operand:SF 0 "register_operand" "=r,f")
-	(unspec:SF [(match_operand:V2SF 1 "register_operand" "rf,r")
+  [(set (match_operand:SF 0 "register_operand" "=rf,r")
+	(unspec:SF [(match_operand:V2SF 1 "nonimmediate_operand" "m,r")
 		    (const_int 0)]
 		   UNSPEC_VECT_EXTR))]
   "TARGET_BIG_ENDIAN"
@@ -1537,31 +1605,44 @@
   "reload_completed"
   [(set (match_dup 0) (match_dup 1))]
 {
-  if (REG_P (operands[1]) && FR_REGNO_P (REGNO (operands[1])))
-    operands[0] = gen_rtx_REG (V2SFmode, REGNO (operands[0]));
+  if (MEM_P (operands[1]))
+    operands[1] = adjust_address (operands[1], SFmode, 0);
   else
-    operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
+    {
+      emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
+      DONE;
+    }
 })
 
-(define_insn_and_split "*vec_extractv2sf_1"
+(define_insn_and_split "*vec_extractv2sf_1_le"
   [(set (match_operand:SF 0 "register_operand" "=r")
 	(unspec:SF [(match_operand:V2SF 1 "register_operand" "r")
 		    (const_int 1)]
 		   UNSPEC_VECT_EXTR))]
-  ""
+  "!TARGET_BIG_ENDIAN"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
 {
   operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
   operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
-  if (TARGET_BIG_ENDIAN)
-    emit_move_insn (operands[0], operands[1]);
-  else
-    emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
+  emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
   DONE;
 })
 
+(define_insn_and_split "*vec_extractv2sf_1_be"
+  [(set (match_operand:SF 0 "register_operand" "=rf")
+	(unspec:SF [(match_operand:V2SF 1 "register_operand" "r")
+		    (const_int 1)]
+		   UNSPEC_VECT_EXTR))]
+  "TARGET_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
+})
+
 (define_expand "vec_extractv2sf"
   [(set (match_operand:SF 0 "register_operand" "")
 	(unspec:SF [(match_operand:V2SF 1 "register_operand" "")
@@ -1610,11 +1691,14 @@
   [(match_operand:V8QI 0 "gr_register_operand" "")
    (match_operand:V4HI 1 "gr_register_operand" "")
    (match_operand:V4HI 2 "gr_register_operand" "")]
-  "!TARGET_BIG_ENDIAN"
+  ""
 {
-  rtx op1 = gen_lowpart(V8QImode, operands[1]);
-  rtx op2 = gen_lowpart(V8QImode, operands[2]);
-  emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2));
+  rtx op1 = gen_lowpart (V8QImode, operands[1]);
+  rtx op2 = gen_lowpart (V8QImode, operands[2]);
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_vec_extract_oddv8qi (operands[0], op1, op2));
+  else
+    emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2));
   DONE;
 })
 
@@ -1624,8 +1708,8 @@
    (match_operand:V2SI 2 "gr_register_operand" "")]
   ""
 {
-  rtx op1 = gen_lowpart(V4HImode, operands[1]);
-  rtx op2 = gen_lowpart(V4HImode, operands[2]);
+  rtx op1 = gen_lowpart (V4HImode, operands[1]);
+  rtx op2 = gen_lowpart (V4HImode, operands[2]);
   if (TARGET_BIG_ENDIAN)
     emit_insn (gen_vec_extract_oddv4hi (operands[0], op1, op2));
   else
diff --git a/gcc/config/microblaze/microblaze.opt b/gcc/config/microblaze/microblaze.opt
index 57380999626..488bd9c2bec 100644
--- a/gcc/config/microblaze/microblaze.opt
+++ b/gcc/config/microblaze/microblaze.opt
@@ -1,6 +1,6 @@
 ; Options for the MicroBlaze port of the compiler
 ;
-; Copyright 2009, 2010 Free Software Foundation, Inc.
+; Copyright 2009, 2010, 2011 Free Software Foundation, Inc.
 ;
 ; Contributed by Michael Eager <eager@eagercon.com>.
 ;
@@ -20,6 +20,21 @@
 ; along with GCC; see the file COPYING3.  If not see
 ; <http://www.gnu.org/licenses/>.  */
 
+Zxl-mode-bootstrap
+Driver
+
+Zxl-mode-executable
+Driver
+
+Zxl-mode-novectors
+Driver
+
+Zxl-mode-xilkernel
+Driver
+
+Zxl-mode-xmdstub
+Driver
+
 msoft-float
 Target Report RejectNegative Mask(SOFT_FLOAT)
 Use software emulation for floating point (default)
@@ -95,3 +110,6 @@ Description for mxl-mode-bootstrap
 mxl-mode-novectors
 Target Mask(XL_MODE_NOVECTORS)
 Description for mxl-mode-novectors
+
+mxl-mode-xilkernel
+Driver
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 6ec3ddf5ba5..34ff9845655 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -1,6 +1,6 @@
 ; Options for the MIPS port of the compiler
 ;
-; Copyright (C) 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+; Copyright (C) 2005, 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -18,6 +18,12 @@
 ; along with GCC; see the file COPYING3.  If not see
 ; <http://www.gnu.org/licenses/>.
 
+EB
+Driver
+
+EL
+Driver
+
 mabi=
 Target RejectNegative Joined
 -mabi=ABI	Generate code that conforms to the given ABI
@@ -299,3 +305,6 @@ Perform VR4130-specific alignment optimizations
 mxgot
 Target Report Var(TARGET_XGOT)
 Lift restrictions on GOT size
+
+noasmopt
+Driver
diff --git a/gcc/config/pa/pa-hpux.opt b/gcc/config/pa/pa-hpux.opt
index 4a656268166..eaed8be2d97 100644
--- a/gcc/config/pa/pa-hpux.opt
+++ b/gcc/config/pa/pa-hpux.opt
@@ -1,6 +1,6 @@
 ; Options for the HP PA-RISC port of the compiler.
 
-; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+; Copyright (C) 2005, 2007, 2011 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -29,3 +29,9 @@ Specify UNIX standard for predefines and linking
 mwsio
 Target RejectNegative InverseMask(SIO)
 Generate cpp defines for workstation IO
+
+nolibdld
+Driver
+
+rdynamic
+Driver
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 93dd4f9a282..583731b9668 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -1,5 +1,5 @@
 /* PowerPC AltiVec include file.
-   Copyright (C) 2002, 2003, 2004, 2005, 2008, 2009, 2010
+   Copyright (C) 2002, 2003, 2004, 2005, 2008, 2009, 2010, 2011
    Free Software Foundation, Inc.
    Contributed by Aldy Hernandez (aldyh@redhat.com).
    Rewritten by Paolo Bonzini (bonzini@gnu.org).
@@ -318,6 +318,8 @@
 #define vec_nearbyint __builtin_vec_nearbyint
 #define vec_rint __builtin_vec_rint
 #define vec_sqrt __builtin_vec_sqrt
+#define vec_vsx_ld __builtin_vec_vsx_ld
+#define vec_vsx_st __builtin_vec_vsx_st
 #endif
 
 /* Predicates.
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index d21d5762ce5..d7357ee3262 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1,5 +1,5 @@
 ;; AltiVec patterns.
-;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
 ;; Free Software Foundation, Inc.
 ;; Contributed by Aldy Hernandez (aldy@quesejoda.com)
 
@@ -96,7 +96,7 @@
    (UNSPEC_STVE         203)
    (UNSPEC_SET_VSCR     213)
    (UNSPEC_GET_VRSAVE   214)
-   ;; 215 deleted
+   (UNSPEC_LVX		215)
    (UNSPEC_REDUC_PLUS   217)
    (UNSPEC_VECSH        219)
    (UNSPEC_EXTEVEN_V4SI 220)
@@ -1750,17 +1750,19 @@
   "lvxl %0,%y1"
   [(set_attr "type" "vecload")])
 
-(define_insn "altivec_lvx"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-	(match_operand:V4SI 1 "memory_operand" "Z"))]
+(define_insn "altivec_lvx_<mode>"
+  [(parallel
+    [(set (match_operand:VM2 0 "register_operand" "=v")
+	  (match_operand:VM2 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_LVX)])]
   "TARGET_ALTIVEC"
   "lvx %0,%y1"
   [(set_attr "type" "vecload")])
 
-(define_insn "altivec_stvx"
+(define_insn "altivec_stvx_<mode>"
   [(parallel
-    [(set (match_operand:V4SI 0 "memory_operand" "=Z")
-	  (match_operand:V4SI 1 "register_operand" "v"))
+    [(set (match_operand:VM2 0 "memory_operand" "=Z")
+	  (match_operand:VM2 1 "register_operand" "v"))
      (unspec [(const_int 0)] UNSPEC_STVX)])]
   "TARGET_ALTIVEC"
   "stvx %1,%y0"
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 9f45a72e2c0..fd6321dcf96 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1,5 +1,5 @@
 /* Builtin functions for rs6000/powerpc.
-   Copyright (C) 2009, 2010
+   Copyright (C) 2009, 2010, 2011
    Free Software Foundation, Inc.
    Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
 
@@ -37,6 +37,10 @@ RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_16qi,	RS6000_BTC_MEM)
 RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_16qi,	RS6000_BTC_MEM)
 RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_4sf,		RS6000_BTC_MEM)
 RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_4sf,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_2df,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_2df,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_2di,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_2di,		RS6000_BTC_MEM)
 RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUBM,			RS6000_BTC_CONST)
 RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUHM,			RS6000_BTC_CONST)
 RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUWM,			RS6000_BTC_CONST)
@@ -778,12 +782,20 @@ RS6000_BUILTIN(PAIRED_BUILTIN_CMPU1,			RS6000_BTC_MISC)
 
   /* VSX builtins.  */
 RS6000_BUILTIN(VSX_BUILTIN_LXSDX,			RS6000_BTC_MEM)
-RS6000_BUILTIN(VSX_BUILTIN_LXVD2X,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVD2X_V2DF,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVD2X_V2DI,			RS6000_BTC_MEM)
 RS6000_BUILTIN(VSX_BUILTIN_LXVDSX,			RS6000_BTC_MEM)
-RS6000_BUILTIN(VSX_BUILTIN_LXVW4X,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V4SF,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V4SI,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V8HI,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V16QI,		RS6000_BTC_MEM)
 RS6000_BUILTIN(VSX_BUILTIN_STXSDX,			RS6000_BTC_MEM)
-RS6000_BUILTIN(VSX_BUILTIN_STXVD2X,			RS6000_BTC_MEM)
-RS6000_BUILTIN(VSX_BUILTIN_STXVW4X,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVD2X_V2DF,		RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVD2X_V2DI,		RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V4SF,		RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V4SI,		RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V8HI,		RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V16QI,		RS6000_BTC_MEM)
 RS6000_BUILTIN(VSX_BUILTIN_XSABSDP,			RS6000_BTC_CONST)
 RS6000_BUILTIN(VSX_BUILTIN_XSADDDP,			RS6000_BTC_FP_PURE)
 RS6000_BUILTIN(VSX_BUILTIN_XSCMPODP,			RS6000_BTC_FP_PURE)
@@ -983,8 +995,10 @@ RS6000_BUILTIN(VSX_BUILTIN_VEC_XXPERMDI,		RS6000_BTC_MISC)
 RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSLDWI,			RS6000_BTC_MISC)
 RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSPLTD,			RS6000_BTC_MISC)
 RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSPLTW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_LD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_ST,			RS6000_BTC_MISC)
 RS6000_BUILTIN_EQUATE(VSX_BUILTIN_OVERLOADED_LAST,
-		      VSX_BUILTIN_VEC_XXSPLTW)
+		      VSX_BUILTIN_VEC_ST)
 
 /* Combined VSX/Altivec builtins.  */
 RS6000_BUILTIN(VECTOR_BUILTIN_FLOAT_V4SI_V4SF,		RS6000_BTC_FP_PURE)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index f29373df867..3f4f90b236c 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -1000,6 +1000,15 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP,
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
   { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
     RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
     RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
@@ -1112,9 +1121,19 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
     RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
   { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
-    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
   { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
@@ -1133,6 +1152,17 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
   { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_long_long, 0 },
   { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
   { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
@@ -1151,6 +1181,17 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
   { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_long_long, 0 },
   { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
     RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
@@ -2644,6 +2685,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_16QI,
     RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_NOT_OPAQUE },
   { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
     RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
   { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
     RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
@@ -2809,6 +2860,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
   { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
     RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V2DI },
   { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
     RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
   { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
@@ -3002,6 +3065,135 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
     RS6000_BTI_NOT_OPAQUE },
 
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V4SI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_long, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V8HI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V16QI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF,
+    RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
+    RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
+    RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V2DI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SF,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SF,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V4SI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTSI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V4SI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTSI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_INTSI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V8HI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTHI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V8HI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTHI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_INTHI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V16QI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTQI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V16QI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTQI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_INTQI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_pixel_V8HI },
+
   /* Predicates.  */
   { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI },
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 3eafc166cb8..d9b6bd70cad 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -1,5 +1,6 @@
 /* Definitions of target machine for GNU compiler, for IBM RS/6000.
-   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
+   2010, 2011
    Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
@@ -129,6 +130,7 @@ extern void rs6000_emit_parity (rtx, rtx);
 extern rtx rs6000_machopic_legitimize_pic_address (rtx, enum machine_mode,
 						   rtx);
 extern rtx rs6000_address_for_fpconvert (rtx);
+extern rtx rs6000_address_for_altivec (rtx);
 extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool);
 extern int rs6000_loop_align (rtx);
 #endif /* RTX_CODE */
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index c5e45fb70be..df02bef54df 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -3316,9 +3316,12 @@ rs6000_option_override_internal (bool global_init_p)
   /* If not explicitly specified via option, decide whether to generate indexed
      load/store instructions.  */
   if (TARGET_AVOID_XFORM == -1)
-    /* Avoid indexed addressing when targeting Power6 in order to avoid
-     the DERAT mispredict penalty.  */
-    TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB);
+    /* Avoid indexed addressing when targeting Power6 in order to avoid the
+     DERAT mispredict penalty.  However the LVE and STVE altivec instructions
+     need indexed accesses and the type used is the scalar type of the element
+     being loaded or stored.  */
+    TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
+			  && !TARGET_ALTIVEC);
 
   /* Set the -mrecip options.  */
   if (rs6000_recip_name)
@@ -11263,16 +11266,22 @@ altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
   switch (fcode)
     {
     case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
-      icode = CODE_FOR_vector_load_v16qi;
+      icode = CODE_FOR_vector_altivec_load_v16qi;
       break;
     case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
-      icode = CODE_FOR_vector_load_v8hi;
+      icode = CODE_FOR_vector_altivec_load_v8hi;
       break;
     case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
-      icode = CODE_FOR_vector_load_v4si;
+      icode = CODE_FOR_vector_altivec_load_v4si;
       break;
     case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
-      icode = CODE_FOR_vector_load_v4sf;
+      icode = CODE_FOR_vector_altivec_load_v4sf;
+      break;
+    case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
+      icode = CODE_FOR_vector_altivec_load_v2df;
+      break;
+    case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
+      icode = CODE_FOR_vector_altivec_load_v2di;
       break;
     default:
       *expandedp = false;
@@ -11316,16 +11325,22 @@ altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
   switch (fcode)
     {
     case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
-      icode = CODE_FOR_vector_store_v16qi;
+      icode = CODE_FOR_vector_altivec_store_v16qi;
       break;
     case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
-      icode = CODE_FOR_vector_store_v8hi;
+      icode = CODE_FOR_vector_altivec_store_v8hi;
       break;
     case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
-      icode = CODE_FOR_vector_store_v4si;
+      icode = CODE_FOR_vector_altivec_store_v4si;
       break;
     case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
-      icode = CODE_FOR_vector_store_v4sf;
+      icode = CODE_FOR_vector_altivec_store_v4sf;
+      break;
+    case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
+      icode = CODE_FOR_vector_altivec_store_v2df;
+      break;
+    case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
+      icode = CODE_FOR_vector_altivec_store_v2di;
       break;
     default:
       *expandedp = false;
@@ -11557,7 +11572,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
   switch (fcode)
     {
     case ALTIVEC_BUILTIN_STVX:
-      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx, exp);
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
     case ALTIVEC_BUILTIN_STVEBX:
       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
     case ALTIVEC_BUILTIN_STVEHX:
@@ -11576,6 +11591,19 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
     case ALTIVEC_BUILTIN_STVRXL:
       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
 
+    case VSX_BUILTIN_STXVD2X_V2DF:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
+    case VSX_BUILTIN_STXVD2X_V2DI:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
+    case VSX_BUILTIN_STXVW4X_V4SF:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
+    case VSX_BUILTIN_STXVW4X_V4SI:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
+    case VSX_BUILTIN_STXVW4X_V8HI:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
+    case VSX_BUILTIN_STXVW4X_V16QI:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
+
     case ALTIVEC_BUILTIN_MFVSCR:
       icode = CODE_FOR_altivec_mfvscr;
       tmode = insn_data[icode].operand[0].mode;
@@ -11700,7 +11728,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl,
 					exp, target, false);
     case ALTIVEC_BUILTIN_LVX:
-      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx,
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
 					exp, target, false);
     case ALTIVEC_BUILTIN_LVLX:
       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
@@ -11714,6 +11742,25 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
     case ALTIVEC_BUILTIN_LVRXL:
       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
 					exp, target, true);
+    case VSX_BUILTIN_LXVD2X_V2DF:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
+					exp, target, false);
+    case VSX_BUILTIN_LXVD2X_V2DI:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
+					exp, target, false);
+    case VSX_BUILTIN_LXVW4X_V4SF:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
+					exp, target, false);
+    case VSX_BUILTIN_LXVW4X_V4SI:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
+					exp, target, false);
+    case VSX_BUILTIN_LXVW4X_V8HI:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
+					exp, target, false);
+    case VSX_BUILTIN_LXVW4X_V16QI:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
+					exp, target, false);
+      break;
     default:
       break;
       /* Fall through.  */
@@ -12331,6 +12378,8 @@ rs6000_init_builtins (void)
 
   long_integer_type_internal_node = long_integer_type_node;
   long_unsigned_type_internal_node = long_unsigned_type_node;
+  long_long_integer_type_internal_node = long_long_integer_type_node;
+  long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
   intQI_type_internal_node = intQI_type_node;
   uintQI_type_internal_node = unsigned_intQI_type_node;
   intHI_type_internal_node = intHI_type_node;
@@ -12340,7 +12389,7 @@ rs6000_init_builtins (void)
   intDI_type_internal_node = intDI_type_node;
   uintDI_type_internal_node = unsigned_intDI_type_node;
   float_type_internal_node = float_type_node;
-  double_type_internal_node = float_type_node;
+  double_type_internal_node = double_type_node;
   void_type_internal_node = void_type_node;
 
   /* Initialize the modes for builtin_function_type, mapping a machine mode to
@@ -12872,19 +12921,11 @@ altivec_init_builtins (void)
   size_t i;
   tree ftype;
 
-  tree pfloat_type_node = build_pointer_type (float_type_node);
-  tree pint_type_node = build_pointer_type (integer_type_node);
-  tree pshort_type_node = build_pointer_type (short_integer_type_node);
-  tree pchar_type_node = build_pointer_type (char_type_node);
-
   tree pvoid_type_node = build_pointer_type (void_type_node);
 
-  tree pcfloat_type_node = build_pointer_type (build_qualified_type (float_type_node, TYPE_QUAL_CONST));
-  tree pcint_type_node = build_pointer_type (build_qualified_type (integer_type_node, TYPE_QUAL_CONST));
-  tree pcshort_type_node = build_pointer_type (build_qualified_type (short_integer_type_node, TYPE_QUAL_CONST));
-  tree pcchar_type_node = build_pointer_type (build_qualified_type (char_type_node, TYPE_QUAL_CONST));
-
-  tree pcvoid_type_node = build_pointer_type (build_qualified_type (void_type_node, TYPE_QUAL_CONST));
+  tree pcvoid_type_node
+    = build_pointer_type (build_qualified_type (void_type_node,
+						TYPE_QUAL_CONST));
 
   tree int_ftype_opaque
     = build_function_type_list (integer_type_node,
@@ -12907,26 +12948,6 @@ altivec_init_builtins (void)
     = build_function_type_list (integer_type_node,
 				integer_type_node, V4SI_type_node,
 				V4SI_type_node, NULL_TREE);
-  tree v4sf_ftype_pcfloat
-    = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
-  tree void_ftype_pfloat_v4sf
-    = build_function_type_list (void_type_node,
-				pfloat_type_node, V4SF_type_node, NULL_TREE);
-  tree v4si_ftype_pcint
-    = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
-  tree void_ftype_pint_v4si
-    = build_function_type_list (void_type_node,
-				pint_type_node, V4SI_type_node, NULL_TREE);
-  tree v8hi_ftype_pcshort
-    = build_function_type_list (V8HI_type_node, pcshort_type_node, NULL_TREE);
-  tree void_ftype_pshort_v8hi
-    = build_function_type_list (void_type_node,
-				pshort_type_node, V8HI_type_node, NULL_TREE);
-  tree v16qi_ftype_pcchar
-    = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
-  tree void_ftype_pchar_v16qi
-    = build_function_type_list (void_type_node,
-				pchar_type_node, V16QI_type_node, NULL_TREE);
   tree void_ftype_v4si
     = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
   tree v8hi_ftype_void
@@ -12938,16 +12959,32 @@ altivec_init_builtins (void)
 
   tree opaque_ftype_long_pcvoid
     = build_function_type_list (opaque_V4SI_type_node,
-				long_integer_type_node, pcvoid_type_node, NULL_TREE);
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
   tree v16qi_ftype_long_pcvoid
     = build_function_type_list (V16QI_type_node,
-				long_integer_type_node, pcvoid_type_node, NULL_TREE);
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
   tree v8hi_ftype_long_pcvoid
     = build_function_type_list (V8HI_type_node,
-				long_integer_type_node, pcvoid_type_node, NULL_TREE);
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
   tree v4si_ftype_long_pcvoid
     = build_function_type_list (V4SI_type_node,
-				long_integer_type_node, pcvoid_type_node, NULL_TREE);
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v4sf_ftype_long_pcvoid
+    = build_function_type_list (V4SF_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v2df_ftype_long_pcvoid
+    = build_function_type_list (V2DF_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v2di_ftype_long_pcvoid
+    = build_function_type_list (V2DI_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
 
   tree void_ftype_opaque_long_pvoid
     = build_function_type_list (void_type_node,
@@ -12965,6 +13002,18 @@ altivec_init_builtins (void)
     = build_function_type_list (void_type_node,
 				V8HI_type_node, long_integer_type_node,
 				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v4sf_long_pvoid
+    = build_function_type_list (void_type_node,
+				V4SF_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v2df_long_pvoid
+    = build_function_type_list (void_type_node,
+				V2DF_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v2di_long_pvoid
+    = build_function_type_list (void_type_node,
+				V2DI_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
   tree int_ftype_int_v8hi_v8hi
     = build_function_type_list (integer_type_node,
 				integer_type_node, V8HI_type_node,
@@ -12996,22 +13045,6 @@ altivec_init_builtins (void)
 				pcvoid_type_node, integer_type_node,
 				integer_type_node, NULL_TREE);
 
-  def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_4sf", v4sf_ftype_pcfloat,
-	       ALTIVEC_BUILTIN_LD_INTERNAL_4sf);
-  def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_4sf", void_ftype_pfloat_v4sf,
-	       ALTIVEC_BUILTIN_ST_INTERNAL_4sf);
-  def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_4si", v4si_ftype_pcint,
-	       ALTIVEC_BUILTIN_LD_INTERNAL_4si);
-  def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_4si", void_ftype_pint_v4si,
-	       ALTIVEC_BUILTIN_ST_INTERNAL_4si);
-  def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_8hi", v8hi_ftype_pcshort,
-	       ALTIVEC_BUILTIN_LD_INTERNAL_8hi);
-  def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_8hi", void_ftype_pshort_v8hi,
-	       ALTIVEC_BUILTIN_ST_INTERNAL_8hi);
-  def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_16qi", v16qi_ftype_pcchar,
-	       ALTIVEC_BUILTIN_LD_INTERNAL_16qi);
-  def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_16qi", void_ftype_pchar_v16qi,
-	       ALTIVEC_BUILTIN_ST_INTERNAL_16qi);
   def_builtin (MASK_ALTIVEC, "__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
   def_builtin (MASK_ALTIVEC, "__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
   def_builtin (MASK_ALTIVEC, "__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
@@ -13043,6 +13076,35 @@ altivec_init_builtins (void)
   def_builtin (MASK_ALTIVEC, "__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
   def_builtin (MASK_ALTIVEC, "__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
 
+  def_builtin (MASK_VSX, "__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
+	       VSX_BUILTIN_LXVD2X_V2DF);
+  def_builtin (MASK_VSX, "__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
+	       VSX_BUILTIN_LXVD2X_V2DI);
+  def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
+	       VSX_BUILTIN_LXVW4X_V4SF);
+  def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
+	       VSX_BUILTIN_LXVW4X_V4SI);
+  def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v8hi",
+	       v8hi_ftype_long_pcvoid, VSX_BUILTIN_LXVW4X_V8HI);
+  def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v16qi",
+	       v16qi_ftype_long_pcvoid, VSX_BUILTIN_LXVW4X_V16QI);
+  def_builtin (MASK_VSX, "__builtin_vsx_stxvd2x_v2df",
+	       void_ftype_v2df_long_pvoid, VSX_BUILTIN_STXVD2X_V2DF);
+  def_builtin (MASK_VSX, "__builtin_vsx_stxvd2x_v2di",
+	       void_ftype_v2di_long_pvoid, VSX_BUILTIN_STXVD2X_V2DI);
+  def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v4sf",
+	       void_ftype_v4sf_long_pvoid, VSX_BUILTIN_STXVW4X_V4SF);
+  def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v4si",
+	       void_ftype_v4si_long_pvoid, VSX_BUILTIN_STXVW4X_V4SI);
+  def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v8hi",
+	       void_ftype_v8hi_long_pvoid, VSX_BUILTIN_STXVW4X_V8HI);
+  def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v16qi",
+	       void_ftype_v16qi_long_pvoid, VSX_BUILTIN_STXVW4X_V16QI);
+  def_builtin (MASK_VSX, "__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
+	       VSX_BUILTIN_VEC_LD);
+  def_builtin (MASK_VSX, "__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
+	       VSX_BUILTIN_VEC_ST);
+
   if (rs6000_cpu == PROCESSOR_CELL)
     {
       def_builtin (MASK_ALTIVEC, "__builtin_altivec_lvlx",  v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
@@ -27925,4 +27987,29 @@ rs6000_address_for_fpconvert (rtx x)
   return x;
 }
 
+/* Given a memory reference, if it is not in the form for altivec memory
+   reference instructions (i.e. reg or reg+reg addressing with AND of -16),
+   convert to the altivec format.  */
+
+rtx
+rs6000_address_for_altivec (rtx x)
+{
+  gcc_assert (MEM_P (x));
+  if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
+    {
+      rtx addr = XEXP (x, 0);
+      int strict_p = (reload_in_progress || reload_completed);
+
+      if (!legitimate_indexed_address_p (addr, strict_p)
+	  && !legitimate_indirect_address_p (addr, strict_p))
+	addr = copy_to_mode_reg (Pmode, addr);
+
+      addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
+      x = change_address (x, GET_MODE (x), addr);
+    }
+
+  return x;
+}
+
+
 #include "gt-rs6000.h"
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index fb6130ffcdb..8c76d7ce101 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1,7 +1,7 @@
 /* Definitions of target machine for GNU compiler, for IBM RS/6000.
    Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
-   2010
+   2010, 2011
    Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
@@ -2368,6 +2368,8 @@ enum rs6000_builtin_type_index
   RS6000_BTI_pixel_V8HI,         /* __vector __pixel */
   RS6000_BTI_long,	         /* long_integer_type_node */
   RS6000_BTI_unsigned_long,      /* long_unsigned_type_node */
+  RS6000_BTI_long_long,	         /* long_long_integer_type_node */
+  RS6000_BTI_unsigned_long_long, /* long_long_unsigned_type_node */
   RS6000_BTI_INTQI,	         /* intQI_type_node */
   RS6000_BTI_UINTQI,		 /* unsigned_intQI_type_node */
   RS6000_BTI_INTHI,	         /* intHI_type_node */
@@ -2411,6 +2413,8 @@ enum rs6000_builtin_type_index
 #define bool_V2DI_type_node	      (rs6000_builtin_types[RS6000_BTI_bool_V2DI])
 #define pixel_V8HI_type_node	      (rs6000_builtin_types[RS6000_BTI_pixel_V8HI])
 
+#define long_long_integer_type_internal_node  (rs6000_builtin_types[RS6000_BTI_long_long])
+#define long_long_unsigned_type_internal_node (rs6000_builtin_types[RS6000_BTI_unsigned_long_long])
 #define long_integer_type_internal_node  (rs6000_builtin_types[RS6000_BTI_long])
 #define long_unsigned_type_internal_node (rs6000_builtin_types[RS6000_BTI_unsigned_long])
 #define intQI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_INTQI])
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 71961fbc57c..5335d9d4301 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -3,7 +3,7 @@
 ;; expander, and the actual vector instructions will be in altivec.md and
 ;; vsx.md
 
-;; Copyright (C) 2009, 2010
+;; Copyright (C) 2009, 2010, 2011
 ;; Free Software Foundation, Inc.
 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
 
@@ -123,6 +123,43 @@
   DONE;
 })
 
+;; Vector floating point load/store instructions that uses the Altivec
+;; instructions even if we are compiling for VSX, since the Altivec
+;; instructions silently ignore the bottom 3 bits of the address, and VSX does
+;; not.
+(define_expand "vector_altivec_load_<mode>"
+  [(set (match_operand:VEC_M 0 "vfloat_operand" "")
+	(match_operand:VEC_M 1 "memory_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  gcc_assert (VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode));
+
+  if (VECTOR_MEM_VSX_P (<MODE>mode))
+    {
+      operands[1] = rs6000_address_for_altivec (operands[1]);
+      emit_insn (gen_altivec_lvx_<mode> (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+(define_expand "vector_altivec_store_<mode>"
+  [(set (match_operand:VEC_M 0 "memory_operand" "")
+	(match_operand:VEC_M 1 "vfloat_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  gcc_assert (VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode));
+
+  if (VECTOR_MEM_VSX_P (<MODE>mode))
+    {
+      operands[0] = rs6000_address_for_altivec (operands[0]);
+      emit_insn (gen_altivec_stvx_<mode> (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+
 
 ;; Reload patterns for vector operations.  We may need an addtional base
 ;; register to convert the reg+offset addressing to reg+reg for vector
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index e241e26db89..3f6da4c0b40 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1,5 +1,5 @@
 ;; VSX patterns.
-;; Copyright (C) 2009, 2010
+;; Copyright (C) 2009, 2010, 2011
 ;; Free Software Foundation, Inc.
 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
 
@@ -308,6 +308,19 @@
 }
   [(set_attr "type" "vecstore,vecload,vecsimple,*,*,*,vecsimple,*,vecstore,vecload")])
 
+;; Explicit  load/store expanders for the builtin functions
+(define_expand "vsx_load_<mode>"
+  [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
+	(match_operand:VSX_M 1 "memory_operand" ""))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vsx_store_<mode>"
+  [(set (match_operand:VEC_M 0 "memory_operand" "")
+	(match_operand:VEC_M 1 "vsx_register_operand" ""))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "")
+
 
 ;; VSX scalar and vector floating point arithmetic instructions
 (define_insn "*vsx_add<mode>3"
@@ -695,33 +708,34 @@
 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
+;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md.
 (define_insn "vsx_float<VSi><mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-	(float:VSX_B (match_operand:<VSI> 1 "vsx_register_operand" "<VSr2>,<VSr3>")))]
+  [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?wa")
+	(float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "x<VSv>cvsx<VSc><VSs> %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_floatuns<VSi><mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-	(unsigned_float:VSX_B (match_operand:<VSI> 1 "vsx_register_operand" "<VSr2>,<VSr3>")))]
+  [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?wa")
+	(unsigned_float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "x<VSv>cvux<VSc><VSs> %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_fix_trunc<mode><VSi>2"
-  [(set (match_operand:<VSI> 0 "vsx_register_operand" "=<VSr2>,?<VSr3>")
-	(fix:<VSI> (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
+	(fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
-  [(set (match_operand:<VSI> 0 "vsx_register_operand" "=<VSr2>,?<VSr3>")
-	(unsigned_fix:<VSI> (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
+	(unsigned_fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
diff --git a/gcc/config/rx/predicates.md b/gcc/config/rx/predicates.md
index 77b3353ac3e..82cac42dadf 100644
--- a/gcc/config/rx/predicates.md
+++ b/gcc/config/rx/predicates.md
@@ -284,7 +284,7 @@
 )
 
 (define_predicate "rx_zs_comparison_operator"
-  (match_code "eq,ne,lt,ge")
+  (match_code "eq,ne")
 )
 
 ;; GT and LE omitted due to operand swap required.
diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c
index 81991067b83..839523fc873 100644
--- a/gcc/config/rx/rx.c
+++ b/gcc/config/rx/rx.c
@@ -450,10 +450,10 @@ rx_print_operand (FILE * file, rtx op, int letter)
 	    switch (code)
 	      {
 	      case LT:
-		ret = "n";
+		ret = "lt";
 		break;
 	      case GE:
-		ret = "pz";
+		ret = "ge";
 		break;
 	      case GT:
 		ret = "gt";
@@ -2625,7 +2625,7 @@ flags_from_code (enum rtx_code code)
     {
     case LT:
     case GE:
-      return CC_FLAG_S;
+      return CC_FLAG_S | CC_FLAG_O;
     case GT:
     case LE:
       return CC_FLAG_S | CC_FLAG_O | CC_FLAG_Z;
diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md
index 99b46b5b2e2..7629a886e5c 100644
--- a/gcc/config/rx/rx.md
+++ b/gcc/config/rx/rx.md
@@ -797,7 +797,10 @@
    (set (reg CC_REG)
 	(compare (abs:SI (match_dup 1))
 		 (const_int 0)))]
-  "reload_completed && rx_match_ccmode (insn, CC_ZSOmode)"
+  ;; Note - although the ABS instruction does set the O bit in the processor
+  ;; status word, it does not do so in a way that is comparable with the CMP
+  ;; instruction.  Hence we use CC_ZSmode rather than CC_ZSOmode.
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
   "@
   abs\t%0
   abs\t%1, %0"
diff --git a/gcc/config/sparc/freebsd.h b/gcc/config/sparc/freebsd.h
index 417357f13fa..f2fb65e33fb 100644
--- a/gcc/config/sparc/freebsd.h
+++ b/gcc/config/sparc/freebsd.h
@@ -1,5 +1,5 @@
 /* Definitions for Sun SPARC64 running FreeBSD using the ELF format
-   Copyright (C) 2001, 2002, 2004, 2005, 2006, 2007, 2010
+   Copyright (C) 2001, 2002, 2004, 2005, 2006, 2007, 2010, 2011
    Free Software Foundation, Inc.
    Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
 
@@ -30,6 +30,9 @@ along with GCC; see the file COPYING3.  If not see
 #define CPP_CPU64_DEFAULT_SPEC \
   "-D__sparc64__ -D__sparc_v9__ -D__sparcv9 -D__arch64__"
 
+#undef ASM_SPEC
+#define ASM_SPEC "%{fpic|fPIC|fpie|fPIE:-K PIC} %(asm_cpu)"
+
 #define LINK_SPEC "%(link_arch)						\
   %{!mno-relax:%{!r:-relax}}						\
   %{p:%nconsider using '-pg' instead of '-p' with gprof(1)}		\
diff --git a/gcc/config/sparc/sol2-bi.h b/gcc/config/sparc/sol2-bi.h
index 48c9a0d11d0..bd9cd70487c 100644
--- a/gcc/config/sparc/sol2-bi.h
+++ b/gcc/config/sparc/sol2-bi.h
@@ -229,14 +229,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #undef	CC1_SPEC
 #if DEFAULT_ARCH32_P
 #define CC1_SPEC "\
-%{sun4:} %{target:} \
 %{m64:%{m32:%emay not use both -m32 and -m64}} \
 %{m64:-mptr64 -mstack-bias -mno-v8plus \
   %{!mcpu*:-%{!mv8plus:mcpu=v9}}} \
 "
 #else
 #define CC1_SPEC "\
-%{sun4:} %{target:} \
 %{m32:%{m64:%emay not use both -m32 and -m64}} \
 %{m32:-mptr32 -mno-stack-bias \
   %{!mcpu*:%{!mv8plus:-mcpu=v9}}} \
diff --git a/gcc/config/sparc/vxworks.h b/gcc/config/sparc/vxworks.h
index 4d971a4bbc8..e1b596e7f79 100644
--- a/gcc/config/sparc/vxworks.h
+++ b/gcc/config/sparc/vxworks.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler,
    for SPARC targeting the VxWorks run time environment.
-   Copyright (C) 2007, 2010 Free Software Foundation, Inc.
+   Copyright (C) 2007, 2010, 2011 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -33,6 +33,9 @@ along with GCC; see the file COPYING3.  If not see
 #undef CPP_SUBTARGET_SPEC
 #define CPP_SUBTARGET_SPEC VXWORKS_ADDITIONAL_CPP_SPEC
 
+#undef ASM_SPEC
+#define ASM_SPEC "%{fpic|fPIC|fpie|fPIE:-K PIC} %(asm_cpu)"
+
 #undef LIB_SPEC
 #define LIB_SPEC VXWORKS_LIB_SPEC
 #undef LINK_SPEC
diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h
index 5db4e4ef42f..0d1738f4e8b 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes of target machine for GNU compiler for Xtensa.
-   Copyright 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010
+   Copyright 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011
    Free Software Foundation, Inc.
    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
 
@@ -63,11 +63,6 @@ extern void print_operand (FILE *, rtx, int);
 extern void print_operand_address (FILE *, rtx);
 extern void xtensa_output_literal (FILE *, rtx, enum machine_mode, int);
 extern rtx xtensa_return_addr (int, rtx);
-extern enum reg_class xtensa_preferred_reload_class (rtx, enum reg_class, int);
-struct secondary_reload_info;
-extern reg_class_t xtensa_secondary_reload (bool, rtx, reg_class_t,
-					    enum machine_mode,
-					    struct secondary_reload_info *);
 #endif /* RTX_CODE */
 
 extern void xtensa_setup_frame_addresses (void);
diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c
index 85dac151c3d..b1ea5bd7872 100644
--- a/gcc/config/xtensa/xtensa.c
+++ b/gcc/config/xtensa/xtensa.c
@@ -162,6 +162,12 @@ static void xtensa_asm_trampoline_template (FILE *);
 static void xtensa_trampoline_init (rtx, tree, rtx);
 static bool xtensa_output_addr_const_extra (FILE *, rtx);
 
+static reg_class_t xtensa_preferred_reload_class (rtx, reg_class_t);
+static reg_class_t xtensa_preferred_output_reload_class (rtx, reg_class_t);
+static reg_class_t xtensa_secondary_reload (bool, rtx, reg_class_t,
+					    enum machine_mode,
+					    struct secondary_reload_info *);
+
 static bool constantpool_address_p (const_rtx addr);
 
 static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] =
@@ -260,6 +266,11 @@ static const struct default_options xtensa_option_optimization_table[] =
 #undef  TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN xtensa_expand_builtin
 
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS xtensa_preferred_reload_class
+#undef  TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS xtensa_preferred_output_reload_class
+
 #undef TARGET_SECONDARY_RELOAD
 #define TARGET_SECONDARY_RELOAD xtensa_secondary_reload
 
@@ -3153,11 +3164,12 @@ xtensa_expand_builtin (tree exp, rtx target,
   return NULL_RTX;
 }
 
+/* Worker function for TARGET_PREFERRED_RELOAD_CLASS.  */
 
-enum reg_class
-xtensa_preferred_reload_class (rtx x, enum reg_class rclass, int isoutput)
+static reg_class_t
+xtensa_preferred_reload_class (rtx x, reg_class_t rclass)
 {
-  if (!isoutput && CONSTANT_P (x) && GET_CODE (x) == CONST_DOUBLE)
+  if (CONSTANT_P (x) && CONST_DOUBLE_P (x))
     return NO_REGS;
 
   /* Don't use the stack pointer or hard frame pointer for reloads!
@@ -3172,8 +3184,27 @@ xtensa_preferred_reload_class (rtx x, enum reg_class rclass, int isoutput)
   return rclass;
 }
 
+/* Worker function for TARGET_PREFERRED_OUTPUT_RELOAD_CLASS.  */
+
+static reg_class_t
+xtensa_preferred_output_reload_class (rtx x ATTRIBUTE_UNUSED,
+				      reg_class_t rclass)
+{
+  /* Don't use the stack pointer or hard frame pointer for reloads!
+     The hard frame pointer would normally be OK except that it may
+     briefly hold an incoming argument in the prologue, and reload
+     won't know that it is live because the hard frame pointer is
+     treated specially.  */
+
+  if (rclass == AR_REGS || rclass == GR_REGS)
+    return RL_REGS;
+
+  return rclass;
+}
+
+/* Worker function for TARGET_SECONDARY_RELOAD.  */
 
-reg_class_t
+static reg_class_t
 xtensa_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
 			 enum machine_mode mode, secondary_reload_info *sri)
 {
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 3d498e1910a..098855b3c42 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -455,12 +455,6 @@ extern const enum reg_class xtensa_regno_to_class[FIRST_PSEUDO_REGISTER];
    the RTL, as either incoming or outgoing arguments.  */
 #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
 
-#define PREFERRED_RELOAD_CLASS(X, CLASS)				\
-  xtensa_preferred_reload_class (X, CLASS, 0)
-
-#define PREFERRED_OUTPUT_RELOAD_CLASS(X, CLASS)				\
-  xtensa_preferred_reload_class (X, CLASS, 1)
-  
 /* Return the maximum number of consecutive registers
    needed to represent mode MODE in a register of class CLASS.  */
 #define CLASS_UNITS(mode, size)						\