[./]

2013-11-22 Basile Starynkevitch <basile@starynkevitch.net> {{merge with trunk GCC 4.9 svn rev 205247 now in stage 3}} [gcc/] 2013-11-22 Basile Starynkevitch <basile@starynkevitch.net> {{merge with trunk GCC 4.9 svn rev 205247 now in stage 3}} * Makefile.in (MELT_GCC_VERSION_NUM): New make variable. (melt-run-md5.h, melt-run.h): Use it. * melt-runtime.cc: With GCC 4.9 include print-tree.h, gimple-iterator.h, gimple-walk.h. (meltgc_start_all_new_modules, meltgc_start_flavored_module) (meltgc_do_initial_mode, meltgc_set_user_options) (meltgc_load_modules_and_do_mode): Improve debugprintf... (melt_gt_ggc_mx_gimple_seq_d): Handle GCC 4.9 specifically. * melt-runtime.h (gt_ggc_mx_gimple_statement_d): Temporarily define this macro. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@205264 138bc75d-0d04-0410-961f-82ee72b054a4
author: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2013-11-22 14:58:36 +0000
committer: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2013-11-22 14:58:36 +0000
commit: cfea6514a4b6ced0930593ebb48d0037e9716d87 (patch)
tree: eeccf866e18463f7dc7ea882ea944247d4ed1010 /gcc/config
parent: 9456798d72d0e81a2a553287f436dcb05cff175a (diff)
download: gcc-cfea6514a4b6ced0930593ebb48d0037e9716d87.tar.gz
121 files changed, 5560 insertions, 4597 deletions
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
index b66e33ec932..683c34c1ec4 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -26,4 +26,4 @@
    this architecture.  ARCH is the architecture revision.  FLAGS are
    the flags implied by the architecture.  */
 
-AARCH64_ARCH("armv8-a",	      generic,	     8,  AARCH64_FL_FOR_ARCH8)
+AARCH64_ARCH("armv8-a",	      cortexa53,	     8,  AARCH64_FL_FOR_ARCH8)
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 0df5b3b0d77..fec7b222529 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -24,6 +24,9 @@
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "stringpool.h"
+#include "calls.h"
 #include "expr.h"
 #include "tm_p.h"
 #include "recog.h"
@@ -31,6 +34,7 @@
 #include "diagnostic-core.h"
 #include "optabs.h"
 #include "gimple.h"
+#include "gimple-iterator.h"
 
 enum aarch64_simd_builtin_type_mode
 {
@@ -80,57 +84,101 @@ enum aarch64_simd_builtin_type_mode
 
 #define UP(X) X##_UP
 
-typedef enum
+#define SIMD_MAX_BUILTIN_ARGS 5
+
+enum aarch64_type_qualifiers
 {
-  AARCH64_SIMD_BINOP,
-  AARCH64_SIMD_TERNOP,
-  AARCH64_SIMD_QUADOP,
-  AARCH64_SIMD_UNOP,
-  AARCH64_SIMD_GETLANE,
-  AARCH64_SIMD_SETLANE,
-  AARCH64_SIMD_CREATE,
-  AARCH64_SIMD_DUP,
-  AARCH64_SIMD_DUPLANE,
-  AARCH64_SIMD_COMBINE,
-  AARCH64_SIMD_SPLIT,
-  AARCH64_SIMD_LANEMUL,
-  AARCH64_SIMD_LANEMULL,
-  AARCH64_SIMD_LANEMULH,
-  AARCH64_SIMD_LANEMAC,
-  AARCH64_SIMD_SCALARMUL,
-  AARCH64_SIMD_SCALARMULL,
-  AARCH64_SIMD_SCALARMULH,
-  AARCH64_SIMD_SCALARMAC,
-  AARCH64_SIMD_CONVERT,
-  AARCH64_SIMD_FIXCONV,
-  AARCH64_SIMD_SELECT,
-  AARCH64_SIMD_RESULTPAIR,
-  AARCH64_SIMD_REINTERP,
-  AARCH64_SIMD_VTBL,
-  AARCH64_SIMD_VTBX,
-  AARCH64_SIMD_LOAD1,
-  AARCH64_SIMD_LOAD1LANE,
-  AARCH64_SIMD_STORE1,
-  AARCH64_SIMD_STORE1LANE,
-  AARCH64_SIMD_LOADSTRUCT,
-  AARCH64_SIMD_LOADSTRUCTLANE,
-  AARCH64_SIMD_STORESTRUCT,
-  AARCH64_SIMD_STORESTRUCTLANE,
-  AARCH64_SIMD_LOGICBINOP,
-  AARCH64_SIMD_SHIFTINSERT,
-  AARCH64_SIMD_SHIFTIMM,
-  AARCH64_SIMD_SHIFTACC
-} aarch64_simd_itype;
+  /* T foo.  */
+  qualifier_none = 0x0,
+  /* unsigned T foo.  */
+  qualifier_unsigned = 0x1, /* 1 << 0  */
+  /* const T foo.  */
+  qualifier_const = 0x2, /* 1 << 1  */
+  /* T *foo.  */
+  qualifier_pointer = 0x4, /* 1 << 2  */
+  /* const T *foo.  */
+  qualifier_const_pointer = 0x6, /* qualifier_const | qualifier_pointer  */
+  /* Used when expanding arguments if an operand could
+     be an immediate.  */
+  qualifier_immediate = 0x8, /* 1 << 3  */
+  qualifier_maybe_immediate = 0x10, /* 1 << 4  */
+  /* void foo (...).  */
+  qualifier_void = 0x20, /* 1 << 5  */
+  /* Some patterns may have internal operands, this qualifier is an
+     instruction to the initialisation code to skip this operand.  */
+  qualifier_internal = 0x40, /* 1 << 6  */
+  /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum
+     rather than using the type of the operand.  */
+  qualifier_map_mode = 0x80, /* 1 << 7  */
+  /* qualifier_pointer | qualifier_map_mode  */
+  qualifier_pointer_map_mode = 0x84,
+  /* qualifier_const_pointer | qualifier_map_mode  */
+  qualifier_const_pointer_map_mode = 0x86
+};
 
 typedef struct
 {
   const char *name;
-  const aarch64_simd_itype itype;
   enum aarch64_simd_builtin_type_mode mode;
   const enum insn_code code;
   unsigned int fcode;
+  enum aarch64_type_qualifiers *qualifiers;
 } aarch64_simd_builtin_datum;
 
+static enum aarch64_type_qualifiers
+aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none };
+#define TYPES_UNOP (aarch64_types_unop_qualifiers)
+#define TYPES_CREATE (aarch64_types_unop_qualifiers)
+#define TYPES_REINTERP (aarch64_types_unop_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
+#define TYPES_BINOP (aarch64_types_binop_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
+#define TYPES_TERNOP (aarch64_types_ternop_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_quadop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none,
+      qualifier_none, qualifier_none };
+#define TYPES_QUADOP (aarch64_types_quadop_qualifiers)
+
+static enum aarch64_type_qualifiers
+aarch64_types_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_immediate };
+#define TYPES_GETLANE (aarch64_types_getlane_qualifiers)
+#define TYPES_SHIFTIMM (aarch64_types_getlane_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate };
+#define TYPES_SETLANE (aarch64_types_setlane_qualifiers)
+#define TYPES_SHIFTINSERT (aarch64_types_setlane_qualifiers)
+#define TYPES_SHIFTACC (aarch64_types_setlane_qualifiers)
+
+static enum aarch64_type_qualifiers
+aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none };
+#define TYPES_COMBINE (aarch64_types_combine_qualifiers)
+
+static enum aarch64_type_qualifiers
+aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_const_pointer_map_mode };
+#define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
+#define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
+
+/* The first argument (return type) of a store should be void type,
+   which we represent with qualifier_void.  Their first operand will be
+   a DImode pointer to the location to store to, so we must use
+   qualifier_map_mode | qualifier_pointer to build a pointer to the
+   element type of the vector.  */
+static enum aarch64_type_qualifiers
+aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
+#define TYPES_STORE1 (aarch64_types_store1_qualifiers)
+#define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
+
 #define CF0(N, X) CODE_FOR_aarch64_##N##X
 #define CF1(N, X) CODE_FOR_##N##X##1
 #define CF2(N, X) CODE_FOR_##N##X##2
@@ -139,7 +187,7 @@ typedef struct
 #define CF10(N, X) CODE_FOR_##N##X
 
 #define VAR1(T, N, MAP, A) \
-  {#N, AARCH64_SIMD_##T, UP (A), CF##MAP (N, A), 0},
+  {#N, UP (A), CF##MAP (N, A), 0, TYPES_##T},
 #define VAR2(T, N, MAP, A, B) \
   VAR1 (T, N, MAP, A) \
   VAR1 (T, N, MAP, B)
@@ -278,118 +326,175 @@ static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
 #define NUM_DREG_TYPES 6
 #define NUM_QREG_TYPES 6
 
+/* Return a tree for a signed or unsigned argument of either
+   the mode specified by MODE, or the inner mode of MODE.  */
+tree
+aarch64_build_scalar_type (enum machine_mode mode, bool unsigned_p)
+{
+#undef INT_TYPES
+#define INT_TYPES \
+  AARCH64_TYPE_BUILDER (QI) \
+  AARCH64_TYPE_BUILDER (HI) \
+  AARCH64_TYPE_BUILDER (SI) \
+  AARCH64_TYPE_BUILDER (DI) \
+  AARCH64_TYPE_BUILDER (EI) \
+  AARCH64_TYPE_BUILDER (OI) \
+  AARCH64_TYPE_BUILDER (CI) \
+  AARCH64_TYPE_BUILDER (XI) \
+  AARCH64_TYPE_BUILDER (TI) \
+
+/* Statically declare all the possible types we might need.  */
+#undef AARCH64_TYPE_BUILDER
+#define AARCH64_TYPE_BUILDER(X) \
+  static tree X##_aarch64_type_node_s = NULL; \
+  static tree X##_aarch64_type_node_u = NULL;
+
+  INT_TYPES
+
+  static tree float_aarch64_type_node = NULL;
+  static tree double_aarch64_type_node = NULL;
+
+  gcc_assert (!VECTOR_MODE_P (mode));
+
+/* If we've already initialised this type, don't initialise it again,
+   otherwise ask for a new type of the correct size.  */
+#undef AARCH64_TYPE_BUILDER
+#define AARCH64_TYPE_BUILDER(X) \
+  case X##mode: \
+    if (unsigned_p) \
+      return (X##_aarch64_type_node_u \
+	      ? X##_aarch64_type_node_u \
+	      : X##_aarch64_type_node_u \
+		  = make_unsigned_type (GET_MODE_PRECISION (mode))); \
+    else \
+       return (X##_aarch64_type_node_s \
+	      ? X##_aarch64_type_node_s \
+	      : X##_aarch64_type_node_s \
+		  = make_signed_type (GET_MODE_PRECISION (mode))); \
+    break;
+
+  switch (mode)
+    {
+      INT_TYPES
+      case SFmode:
+	if (!float_aarch64_type_node)
+	  {
+	    float_aarch64_type_node = make_node (REAL_TYPE);
+	    TYPE_PRECISION (float_aarch64_type_node) = FLOAT_TYPE_SIZE;
+	    layout_type (float_aarch64_type_node);
+	  }
+	return float_aarch64_type_node;
+	break;
+      case DFmode:
+	if (!double_aarch64_type_node)
+	  {
+	    double_aarch64_type_node = make_node (REAL_TYPE);
+	    TYPE_PRECISION (double_aarch64_type_node) = DOUBLE_TYPE_SIZE;
+	    layout_type (double_aarch64_type_node);
+	  }
+	return double_aarch64_type_node;
+	break;
+      default:
+	gcc_unreachable ();
+    }
+}
+
+tree
+aarch64_build_vector_type (enum machine_mode mode, bool unsigned_p)
+{
+  tree eltype;
+
+#define VECTOR_TYPES \
+  AARCH64_TYPE_BUILDER (V16QI) \
+  AARCH64_TYPE_BUILDER (V8HI) \
+  AARCH64_TYPE_BUILDER (V4SI) \
+  AARCH64_TYPE_BUILDER (V2DI) \
+  AARCH64_TYPE_BUILDER (V8QI) \
+  AARCH64_TYPE_BUILDER (V4HI) \
+  AARCH64_TYPE_BUILDER (V2SI) \
+  \
+  AARCH64_TYPE_BUILDER (V4SF) \
+  AARCH64_TYPE_BUILDER (V2DF) \
+  AARCH64_TYPE_BUILDER (V2SF) \
+/* Declare our "cache" of values.  */
+#undef AARCH64_TYPE_BUILDER
+#define AARCH64_TYPE_BUILDER(X) \
+  static tree X##_aarch64_type_node_s = NULL; \
+  static tree X##_aarch64_type_node_u = NULL;
+
+  VECTOR_TYPES
+
+  gcc_assert (VECTOR_MODE_P (mode));
+
+#undef AARCH64_TYPE_BUILDER
+#define AARCH64_TYPE_BUILDER(X) \
+  case X##mode: \
+    if (unsigned_p) \
+      return X##_aarch64_type_node_u \
+	     ? X##_aarch64_type_node_u \
+	     : X##_aarch64_type_node_u \
+		= build_vector_type_for_mode (aarch64_build_scalar_type \
+						(GET_MODE_INNER (mode), \
+						 unsigned_p), mode); \
+    else \
+       return X##_aarch64_type_node_s \
+	      ? X##_aarch64_type_node_s \
+	      : X##_aarch64_type_node_s \
+		= build_vector_type_for_mode (aarch64_build_scalar_type \
+						(GET_MODE_INNER (mode), \
+						 unsigned_p), mode); \
+    break;
+
+  switch (mode)
+    {
+      default:
+	eltype = aarch64_build_scalar_type (GET_MODE_INNER (mode), unsigned_p);
+	return build_vector_type_for_mode (eltype, mode);
+	break;
+      VECTOR_TYPES
+   }
+}
+
+tree
+aarch64_build_type (enum machine_mode mode, bool unsigned_p)
+{
+  if (VECTOR_MODE_P (mode))
+    return aarch64_build_vector_type (mode, unsigned_p);
+  else
+    return aarch64_build_scalar_type (mode, unsigned_p);
+}
+
 static void
 aarch64_init_simd_builtins (void)
 {
   unsigned int i, fcode = AARCH64_SIMD_BUILTIN_BASE + 1;
 
-  /* Scalar type nodes.  */
-  tree aarch64_simd_intQI_type_node;
-  tree aarch64_simd_intHI_type_node;
-  tree aarch64_simd_polyQI_type_node;
-  tree aarch64_simd_polyHI_type_node;
-  tree aarch64_simd_intSI_type_node;
-  tree aarch64_simd_intDI_type_node;
-  tree aarch64_simd_float_type_node;
-  tree aarch64_simd_double_type_node;
-
-  /* Pointer to scalar type nodes.  */
-  tree intQI_pointer_node;
-  tree intHI_pointer_node;
-  tree intSI_pointer_node;
-  tree intDI_pointer_node;
-  tree float_pointer_node;
-  tree double_pointer_node;
-
-  /* Const scalar type nodes.  */
-  tree const_intQI_node;
-  tree const_intHI_node;
-  tree const_intSI_node;
-  tree const_intDI_node;
-  tree const_float_node;
-  tree const_double_node;
-
-  /* Pointer to const scalar type nodes.  */
-  tree const_intQI_pointer_node;
-  tree const_intHI_pointer_node;
-  tree const_intSI_pointer_node;
-  tree const_intDI_pointer_node;
-  tree const_float_pointer_node;
-  tree const_double_pointer_node;
-
-  /* Vector type nodes.  */
-  tree V8QI_type_node;
-  tree V4HI_type_node;
-  tree V2SI_type_node;
-  tree V2SF_type_node;
-  tree V16QI_type_node;
-  tree V8HI_type_node;
-  tree V4SI_type_node;
-  tree V4SF_type_node;
-  tree V2DI_type_node;
-  tree V2DF_type_node;
-
-  /* Scalar unsigned type nodes.  */
-  tree intUQI_type_node;
-  tree intUHI_type_node;
-  tree intUSI_type_node;
-  tree intUDI_type_node;
-
-  /* Opaque integer types for structures of vectors.  */
-  tree intEI_type_node;
-  tree intOI_type_node;
-  tree intCI_type_node;
-  tree intXI_type_node;
-
-  /* Pointer to vector type nodes.  */
-  tree V8QI_pointer_node;
-  tree V4HI_pointer_node;
-  tree V2SI_pointer_node;
-  tree V2SF_pointer_node;
-  tree V16QI_pointer_node;
-  tree V8HI_pointer_node;
-  tree V4SI_pointer_node;
-  tree V4SF_pointer_node;
-  tree V2DI_pointer_node;
-  tree V2DF_pointer_node;
-
-  /* Operations which return results as pairs.  */
-  tree void_ftype_pv8qi_v8qi_v8qi;
-  tree void_ftype_pv4hi_v4hi_v4hi;
-  tree void_ftype_pv2si_v2si_v2si;
-  tree void_ftype_pv2sf_v2sf_v2sf;
-  tree void_ftype_pdi_di_di;
-  tree void_ftype_pv16qi_v16qi_v16qi;
-  tree void_ftype_pv8hi_v8hi_v8hi;
-  tree void_ftype_pv4si_v4si_v4si;
-  tree void_ftype_pv4sf_v4sf_v4sf;
-  tree void_ftype_pv2di_v2di_v2di;
-  tree void_ftype_pv2df_v2df_v2df;
-
-  tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
-  tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
-  tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
-
-  /* Create distinguished type nodes for AARCH64_SIMD vector element types,
-     and pointers to values of such types, so we can detect them later.  */
-  aarch64_simd_intQI_type_node =
-    make_signed_type (GET_MODE_PRECISION (QImode));
-  aarch64_simd_intHI_type_node =
-    make_signed_type (GET_MODE_PRECISION (HImode));
-  aarch64_simd_polyQI_type_node =
+  /* In order that 'poly' types mangle correctly they must not share
+     a base tree with the other scalar types, thus we must generate them
+     as a special case.  */
+  tree aarch64_simd_polyQI_type_node =
     make_signed_type (GET_MODE_PRECISION (QImode));
-  aarch64_simd_polyHI_type_node =
+  tree aarch64_simd_polyHI_type_node =
     make_signed_type (GET_MODE_PRECISION (HImode));
-  aarch64_simd_intSI_type_node =
-    make_signed_type (GET_MODE_PRECISION (SImode));
-  aarch64_simd_intDI_type_node =
-    make_signed_type (GET_MODE_PRECISION (DImode));
-  aarch64_simd_float_type_node = make_node (REAL_TYPE);
-  aarch64_simd_double_type_node = make_node (REAL_TYPE);
-  TYPE_PRECISION (aarch64_simd_float_type_node) = FLOAT_TYPE_SIZE;
-  TYPE_PRECISION (aarch64_simd_double_type_node) = DOUBLE_TYPE_SIZE;
-  layout_type (aarch64_simd_float_type_node);
-  layout_type (aarch64_simd_double_type_node);
+
+  /* Scalar type nodes.  */
+  tree aarch64_simd_intQI_type_node = aarch64_build_type (QImode, false);
+  tree aarch64_simd_intHI_type_node = aarch64_build_type (HImode, false);
+  tree aarch64_simd_intSI_type_node = aarch64_build_type (SImode, false);
+  tree aarch64_simd_intDI_type_node = aarch64_build_type (DImode, false);
+  tree aarch64_simd_intTI_type_node = aarch64_build_type (TImode, false);
+  tree aarch64_simd_intEI_type_node = aarch64_build_type (EImode, false);
+  tree aarch64_simd_intOI_type_node = aarch64_build_type (OImode, false);
+  tree aarch64_simd_intCI_type_node = aarch64_build_type (CImode, false);
+  tree aarch64_simd_intXI_type_node = aarch64_build_type (XImode, false);
+  tree aarch64_simd_intUQI_type_node = aarch64_build_type (QImode, true);
+  tree aarch64_simd_intUHI_type_node = aarch64_build_type (HImode, true);
+  tree aarch64_simd_intUSI_type_node = aarch64_build_type (SImode, true);
+  tree aarch64_simd_intUDI_type_node = aarch64_build_type (DImode, true);
+
+  /* Float type nodes.  */
+  tree aarch64_simd_float_type_node = aarch64_build_type (SFmode, false);
+  tree aarch64_simd_double_type_node = aarch64_build_type (DFmode, false);
 
   /* Define typedefs which exactly correspond to the modes we are basing vector
      types on.  If you change these names you'll need to change
@@ -410,518 +515,129 @@ aarch64_init_simd_builtins (void)
 					     "__builtin_aarch64_simd_poly8");
   (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyHI_type_node,
 					     "__builtin_aarch64_simd_poly16");
-
-  intQI_pointer_node = build_pointer_type (aarch64_simd_intQI_type_node);
-  intHI_pointer_node = build_pointer_type (aarch64_simd_intHI_type_node);
-  intSI_pointer_node = build_pointer_type (aarch64_simd_intSI_type_node);
-  intDI_pointer_node = build_pointer_type (aarch64_simd_intDI_type_node);
-  float_pointer_node = build_pointer_type (aarch64_simd_float_type_node);
-  double_pointer_node = build_pointer_type (aarch64_simd_double_type_node);
-
-  /* Next create constant-qualified versions of the above types.  */
-  const_intQI_node = build_qualified_type (aarch64_simd_intQI_type_node,
-					   TYPE_QUAL_CONST);
-  const_intHI_node = build_qualified_type (aarch64_simd_intHI_type_node,
-					   TYPE_QUAL_CONST);
-  const_intSI_node = build_qualified_type (aarch64_simd_intSI_type_node,
-					   TYPE_QUAL_CONST);
-  const_intDI_node = build_qualified_type (aarch64_simd_intDI_type_node,
-					   TYPE_QUAL_CONST);
-  const_float_node = build_qualified_type (aarch64_simd_float_type_node,
-					   TYPE_QUAL_CONST);
-  const_double_node = build_qualified_type (aarch64_simd_double_type_node,
-					    TYPE_QUAL_CONST);
-
-  const_intQI_pointer_node = build_pointer_type (const_intQI_node);
-  const_intHI_pointer_node = build_pointer_type (const_intHI_node);
-  const_intSI_pointer_node = build_pointer_type (const_intSI_node);
-  const_intDI_pointer_node = build_pointer_type (const_intDI_node);
-  const_float_pointer_node = build_pointer_type (const_float_node);
-  const_double_pointer_node = build_pointer_type (const_double_node);
-
-  /* Now create vector types based on our AARCH64 SIMD element types.  */
-  /* 64-bit vectors.  */
-  V8QI_type_node =
-    build_vector_type_for_mode (aarch64_simd_intQI_type_node, V8QImode);
-  V4HI_type_node =
-    build_vector_type_for_mode (aarch64_simd_intHI_type_node, V4HImode);
-  V2SI_type_node =
-    build_vector_type_for_mode (aarch64_simd_intSI_type_node, V2SImode);
-  V2SF_type_node =
-    build_vector_type_for_mode (aarch64_simd_float_type_node, V2SFmode);
-  /* 128-bit vectors.  */
-  V16QI_type_node =
-    build_vector_type_for_mode (aarch64_simd_intQI_type_node, V16QImode);
-  V8HI_type_node =
-    build_vector_type_for_mode (aarch64_simd_intHI_type_node, V8HImode);
-  V4SI_type_node =
-    build_vector_type_for_mode (aarch64_simd_intSI_type_node, V4SImode);
-  V4SF_type_node =
-    build_vector_type_for_mode (aarch64_simd_float_type_node, V4SFmode);
-  V2DI_type_node =
-    build_vector_type_for_mode (aarch64_simd_intDI_type_node, V2DImode);
-  V2DF_type_node =
-    build_vector_type_for_mode (aarch64_simd_double_type_node, V2DFmode);
-
-  /* Unsigned integer types for various mode sizes.  */
-  intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
-  intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
-  intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
-  intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
-
-  (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
-					     "__builtin_aarch64_simd_uqi");
-  (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
-					     "__builtin_aarch64_simd_uhi");
-  (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
-					     "__builtin_aarch64_simd_usi");
-  (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
-					     "__builtin_aarch64_simd_udi");
-
-  /* Opaque integer types for structures of vectors.  */
-  intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
-  intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
-  intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
-  intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
-
-  (*lang_hooks.types.register_builtin_type) (intTI_type_node,
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intTI_type_node,
 					     "__builtin_aarch64_simd_ti");
-  (*lang_hooks.types.register_builtin_type) (intEI_type_node,
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intEI_type_node,
 					     "__builtin_aarch64_simd_ei");
-  (*lang_hooks.types.register_builtin_type) (intOI_type_node,
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intOI_type_node,
 					     "__builtin_aarch64_simd_oi");
-  (*lang_hooks.types.register_builtin_type) (intCI_type_node,
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intCI_type_node,
 					     "__builtin_aarch64_simd_ci");
-  (*lang_hooks.types.register_builtin_type) (intXI_type_node,
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intXI_type_node,
 					     "__builtin_aarch64_simd_xi");
 
-  /* Pointers to vector types.  */
-  V8QI_pointer_node = build_pointer_type (V8QI_type_node);
-  V4HI_pointer_node = build_pointer_type (V4HI_type_node);
-  V2SI_pointer_node = build_pointer_type (V2SI_type_node);
-  V2SF_pointer_node = build_pointer_type (V2SF_type_node);
-  V16QI_pointer_node = build_pointer_type (V16QI_type_node);
-  V8HI_pointer_node = build_pointer_type (V8HI_type_node);
-  V4SI_pointer_node = build_pointer_type (V4SI_type_node);
-  V4SF_pointer_node = build_pointer_type (V4SF_type_node);
-  V2DI_pointer_node = build_pointer_type (V2DI_type_node);
-  V2DF_pointer_node = build_pointer_type (V2DF_type_node);
-
-  /* Operations which return results as pairs.  */
-  void_ftype_pv8qi_v8qi_v8qi =
-    build_function_type_list (void_type_node, V8QI_pointer_node,
-			      V8QI_type_node, V8QI_type_node, NULL);
-  void_ftype_pv4hi_v4hi_v4hi =
-    build_function_type_list (void_type_node, V4HI_pointer_node,
-			      V4HI_type_node, V4HI_type_node, NULL);
-  void_ftype_pv2si_v2si_v2si =
-    build_function_type_list (void_type_node, V2SI_pointer_node,
-			      V2SI_type_node, V2SI_type_node, NULL);
-  void_ftype_pv2sf_v2sf_v2sf =
-    build_function_type_list (void_type_node, V2SF_pointer_node,
-			      V2SF_type_node, V2SF_type_node, NULL);
-  void_ftype_pdi_di_di =
-    build_function_type_list (void_type_node, intDI_pointer_node,
-			      aarch64_simd_intDI_type_node,
-			      aarch64_simd_intDI_type_node, NULL);
-  void_ftype_pv16qi_v16qi_v16qi =
-    build_function_type_list (void_type_node, V16QI_pointer_node,
-			      V16QI_type_node, V16QI_type_node, NULL);
-  void_ftype_pv8hi_v8hi_v8hi =
-    build_function_type_list (void_type_node, V8HI_pointer_node,
-			      V8HI_type_node, V8HI_type_node, NULL);
-  void_ftype_pv4si_v4si_v4si =
-    build_function_type_list (void_type_node, V4SI_pointer_node,
-			      V4SI_type_node, V4SI_type_node, NULL);
-  void_ftype_pv4sf_v4sf_v4sf =
-    build_function_type_list (void_type_node, V4SF_pointer_node,
-			      V4SF_type_node, V4SF_type_node, NULL);
-  void_ftype_pv2di_v2di_v2di =
-    build_function_type_list (void_type_node, V2DI_pointer_node,
-			      V2DI_type_node, V2DI_type_node, NULL);
-  void_ftype_pv2df_v2df_v2df =
-    build_function_type_list (void_type_node, V2DF_pointer_node,
-			      V2DF_type_node, V2DF_type_node, NULL);
-
-  dreg_types[0] = V8QI_type_node;
-  dreg_types[1] = V4HI_type_node;
-  dreg_types[2] = V2SI_type_node;
-  dreg_types[3] = V2SF_type_node;
-  dreg_types[4] = aarch64_simd_intDI_type_node;
-  dreg_types[5] = aarch64_simd_double_type_node;
-
-  qreg_types[0] = V16QI_type_node;
-  qreg_types[1] = V8HI_type_node;
-  qreg_types[2] = V4SI_type_node;
-  qreg_types[3] = V4SF_type_node;
-  qreg_types[4] = V2DI_type_node;
-  qreg_types[5] = V2DF_type_node;
-
-  /* If NUM_DREG_TYPES != NUM_QREG_TYPES, we will need separate nested loops
-     for qreg and dreg reinterp inits.  */
-  for (i = 0; i < NUM_DREG_TYPES; i++)
-    {
-      int j;
-      for (j = 0; j < NUM_DREG_TYPES; j++)
-	{
-	  reinterp_ftype_dreg[i][j]
-	    = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
-	  reinterp_ftype_qreg[i][j]
-	    = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
-	}
-    }
+  /* Unsigned integer types for various mode sizes.  */
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUQI_type_node,
+					     "__builtin_aarch64_simd_uqi");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUHI_type_node,
+					     "__builtin_aarch64_simd_uhi");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUSI_type_node,
+					     "__builtin_aarch64_simd_usi");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUDI_type_node,
+					     "__builtin_aarch64_simd_udi");
 
   for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
     {
+      bool print_type_signature_p = false;
+      char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 };
       aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
       const char *const modenames[] =
-      {
-	"v8qi", "v4hi", "v2si", "v2sf", "di", "df",
-	"v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df",
-	"ti", "ei", "oi", "xi", "si", "sf", "hi", "qi"
-      };
+	{
+	  "v8qi", "v4hi", "v2si", "v2sf", "di", "df",
+	  "v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df",
+	  "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi"
+	};
+      const enum machine_mode modes[] =
+	{
+	  V8QImode, V4HImode, V2SImode, V2SFmode, DImode, DFmode,
+	  V16QImode, V8HImode, V4SImode, V4SFmode, V2DImode,
+	  V2DFmode, TImode, EImode, OImode, XImode, SImode,
+	  SFmode, HImode, QImode
+	};
       char namebuf[60];
       tree ftype = NULL;
       tree fndecl = NULL;
-      int is_load = 0;
-      int is_store = 0;
 
       gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
 
       d->fcode = fcode;
 
-      switch (d->itype)
+      /* We must track two variables here.  op_num is
+	 the operand number as in the RTL pattern.  This is
+	 required to access the mode (e.g. V4SF mode) of the
+	 argument, from which the base type can be derived.
+	 arg_num is an index in to the qualifiers data, which
+	 gives qualifiers to the type (e.g. const unsigned).
+	 The reason these two variables may differ by one is the
+	 void return type.  While all return types take the 0th entry
+	 in the qualifiers array, there is no operand for them in the
+	 RTL pattern.  */
+      int op_num = insn_data[d->code].n_operands - 1;
+      int arg_num = d->qualifiers[0] & qualifier_void
+		      ? op_num + 1
+		      : op_num;
+      tree return_type = void_type_node, args = void_list_node;
+      tree eltype;
+
+      /* Build a function type directly from the insn_data for this
+	 builtin.  The build_function_type () function takes care of
+	 removing duplicates for us.  */
+      for (; op_num >= 0; arg_num--, op_num--)
 	{
-	case AARCH64_SIMD_LOAD1:
-	case AARCH64_SIMD_LOAD1LANE:
-	case AARCH64_SIMD_LOADSTRUCT:
-	case AARCH64_SIMD_LOADSTRUCTLANE:
-	    is_load = 1;
-	  /* Fall through.  */
-	case AARCH64_SIMD_STORE1:
-	case AARCH64_SIMD_STORE1LANE:
-	case AARCH64_SIMD_STORESTRUCT:
-	case AARCH64_SIMD_STORESTRUCTLANE:
-	    if (!is_load)
-	      is_store = 1;
-	  /* Fall through.  */
-	case AARCH64_SIMD_UNOP:
-	case AARCH64_SIMD_BINOP:
-	case AARCH64_SIMD_TERNOP:
-	case AARCH64_SIMD_QUADOP:
-	case AARCH64_SIMD_COMBINE:
-	case AARCH64_SIMD_CONVERT:
-	case AARCH64_SIMD_CREATE:
-	case AARCH64_SIMD_DUP:
-	case AARCH64_SIMD_DUPLANE:
-	case AARCH64_SIMD_FIXCONV:
-	case AARCH64_SIMD_GETLANE:
-	case AARCH64_SIMD_LANEMAC:
-	case AARCH64_SIMD_LANEMUL:
-	case AARCH64_SIMD_LANEMULH:
-	case AARCH64_SIMD_LANEMULL:
-	case AARCH64_SIMD_LOGICBINOP:
-	case AARCH64_SIMD_SCALARMAC:
-	case AARCH64_SIMD_SCALARMUL:
-	case AARCH64_SIMD_SCALARMULH:
-	case AARCH64_SIMD_SCALARMULL:
-	case AARCH64_SIMD_SELECT:
-	case AARCH64_SIMD_SETLANE:
-	case AARCH64_SIMD_SHIFTACC:
-	case AARCH64_SIMD_SHIFTIMM:
-	case AARCH64_SIMD_SHIFTINSERT:
-	case AARCH64_SIMD_SPLIT:
-	case AARCH64_SIMD_VTBL:
-	case AARCH64_SIMD_VTBX:
-	  {
-	    int k;
-	    tree return_type = void_type_node, args = void_list_node;
-	    tree eltype;
-	    /* Build a function type directly from the insn_data for this
-	       builtin.  The build_function_type () function takes care of
-	       removing duplicates for us.  */
-
-	    for (k = insn_data[d->code].n_operands -1; k >= 0; k--)
-	      {
-		/* Skip an internal operand for vget_{low, high}.  */
-		if (k == 2 && d->itype == AARCH64_SIMD_SPLIT)
-		  continue;
-
-		if (is_load && k == 1)
-		  {
-		    /* AdvSIMD load patterns always have the memory operand
-		       (a DImode pointer) in the operand 1 position.  We
-		       want a const pointer to the element type in that
-		       position.  */
-		    gcc_assert (insn_data[d->code].operand[k].mode == DImode);
-
-		    switch (d->mode)
-		      {
-		      case T_V8QI:
-		      case T_V16QI:
-			eltype = const_intQI_pointer_node;
-			break;
-
-		      case T_V4HI:
-		      case T_V8HI:
-			eltype = const_intHI_pointer_node;
-			break;
-
-		      case T_V2SI:
-		      case T_V4SI:
-			eltype = const_intSI_pointer_node;
-			break;
-
-		      case T_V2SF:
-		      case T_V4SF:
-			eltype = const_float_pointer_node;
-			break;
-
-		      case T_DI:
-		      case T_V2DI:
-			eltype = const_intDI_pointer_node;
-			break;
-
-		      case T_DF:
-		      case T_V2DF:
-			eltype = const_double_pointer_node;
-			break;
-
-		      default:
-			gcc_unreachable ();
-		      }
-		  }
-		else if (is_store && k == 0)
-		  {
-		    /* Similarly, AdvSIMD store patterns use operand 0 as
-		       the memory location to store to (a DImode pointer).
-		       Use a pointer to the element type of the store in
-		       that position.  */
-		    gcc_assert (insn_data[d->code].operand[k].mode == DImode);
-
-		    switch (d->mode)
-		      {
-		      case T_V8QI:
-		      case T_V16QI:
-			eltype = intQI_pointer_node;
-			break;
-
-		      case T_V4HI:
-		      case T_V8HI:
-			eltype = intHI_pointer_node;
-			break;
-
-		      case T_V2SI:
-		      case T_V4SI:
-			eltype = intSI_pointer_node;
-			break;
-
-		      case T_V2SF:
-		      case T_V4SF:
-			eltype = float_pointer_node;
-			break;
-
-		      case T_DI:
-		      case T_V2DI:
-			eltype = intDI_pointer_node;
-			break;
-
-		      case T_DF:
-		      case T_V2DF:
-			eltype = double_pointer_node;
-			break;
-
-		      default:
-			gcc_unreachable ();
-		      }
-		  }
-		else
-		  {
-		    switch (insn_data[d->code].operand[k].mode)
-		      {
-		      case VOIDmode:
-			eltype = void_type_node;
-			break;
-			/* Scalars.  */
-		      case QImode:
-			eltype = aarch64_simd_intQI_type_node;
-			break;
-		      case HImode:
-			eltype = aarch64_simd_intHI_type_node;
-			break;
-		      case SImode:
-			eltype = aarch64_simd_intSI_type_node;
-			break;
-		      case SFmode:
-			eltype = aarch64_simd_float_type_node;
-			break;
-		      case DFmode:
-			eltype = aarch64_simd_double_type_node;
-			break;
-		      case DImode:
-			eltype = aarch64_simd_intDI_type_node;
-			break;
-		      case TImode:
-			eltype = intTI_type_node;
-			break;
-		      case EImode:
-			eltype = intEI_type_node;
-			break;
-		      case OImode:
-			eltype = intOI_type_node;
-			break;
-		      case CImode:
-			eltype = intCI_type_node;
-			break;
-		      case XImode:
-			eltype = intXI_type_node;
-			break;
-			/* 64-bit vectors.  */
-		      case V8QImode:
-			eltype = V8QI_type_node;
-			break;
-		      case V4HImode:
-			eltype = V4HI_type_node;
-			break;
-		      case V2SImode:
-			eltype = V2SI_type_node;
-			break;
-		      case V2SFmode:
-			eltype = V2SF_type_node;
-			break;
-			/* 128-bit vectors.  */
-		      case V16QImode:
-			eltype = V16QI_type_node;
-			break;
-		      case V8HImode:
-			eltype = V8HI_type_node;
-			break;
-		      case V4SImode:
-			eltype = V4SI_type_node;
-			break;
-		      case V4SFmode:
-			eltype = V4SF_type_node;
-			break;
-		      case V2DImode:
-			eltype = V2DI_type_node;
-			break;
-		      case V2DFmode:
-			eltype = V2DF_type_node;
-			break;
-		      default:
-			gcc_unreachable ();
-		      }
-		  }
-
-		if (k == 0 && !is_store)
-		  return_type = eltype;
-		else
-		  args = tree_cons (NULL_TREE, eltype, args);
-	      }
-	    ftype = build_function_type (return_type, args);
-	  }
-	  break;
+	  enum machine_mode op_mode = insn_data[d->code].operand[op_num].mode;
+	  enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num];
 
-	case AARCH64_SIMD_RESULTPAIR:
-	  {
-	    switch (insn_data[d->code].operand[1].mode)
-	      {
-	      case V8QImode:
-		ftype = void_ftype_pv8qi_v8qi_v8qi;
-		break;
-	      case V4HImode:
-		ftype = void_ftype_pv4hi_v4hi_v4hi;
-		break;
-	      case V2SImode:
-		ftype = void_ftype_pv2si_v2si_v2si;
-		break;
-	      case V2SFmode:
-		ftype = void_ftype_pv2sf_v2sf_v2sf;
-		break;
-	      case DImode:
-		ftype = void_ftype_pdi_di_di;
-		break;
-	      case V16QImode:
-		ftype = void_ftype_pv16qi_v16qi_v16qi;
-		break;
-	      case V8HImode:
-		ftype = void_ftype_pv8hi_v8hi_v8hi;
-		break;
-	      case V4SImode:
-		ftype = void_ftype_pv4si_v4si_v4si;
-		break;
-	      case V4SFmode:
-		ftype = void_ftype_pv4sf_v4sf_v4sf;
-		break;
-	      case V2DImode:
-		ftype = void_ftype_pv2di_v2di_v2di;
-		break;
-	      case V2DFmode:
-		ftype = void_ftype_pv2df_v2df_v2df;
-		break;
-	      default:
-		gcc_unreachable ();
-	      }
-	  }
-	  break;
+	  if (qualifiers & qualifier_unsigned)
+	    {
+	      type_signature[arg_num] = 'u';
+	      print_type_signature_p = true;
+	    }
+	  else
+	    type_signature[arg_num] = 's';
+
+	  /* Skip an internal operand for vget_{low, high}.  */
+	  if (qualifiers & qualifier_internal)
+	    continue;
+
+	  /* Some builtins have different user-facing types
+	     for certain arguments, encoded in d->mode.  */
+	  if (qualifiers & qualifier_map_mode)
+	      op_mode = modes[d->mode];
+
+	  /* For pointers, we want a pointer to the basic type
+	     of the vector.  */
+	  if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
+	    op_mode = GET_MODE_INNER (op_mode);
+
+	  eltype = aarch64_build_type (op_mode,
+				       qualifiers & qualifier_unsigned);
+
+	  /* Add qualifiers.  */
+	  if (qualifiers & qualifier_const)
+	    eltype = build_qualified_type (eltype, TYPE_QUAL_CONST);
+
+	  if (qualifiers & qualifier_pointer)
+	      eltype = build_pointer_type (eltype);
+
+	  /* If we have reached arg_num == 0, we are at a non-void
+	     return type.  Otherwise, we are still processing
+	     arguments.  */
+	  if (arg_num == 0)
+	    return_type = eltype;
+	  else
+	    args = tree_cons (NULL_TREE, eltype, args);
+	}
 
-	case AARCH64_SIMD_REINTERP:
-	  {
-	    /* We iterate over 6 doubleword types, then 6 quadword
-	       types.  */
-	    int rhs_d = d->mode % NUM_DREG_TYPES;
-	    int rhs_q = (d->mode - NUM_DREG_TYPES) % NUM_QREG_TYPES;
-	    switch (insn_data[d->code].operand[0].mode)
-	      {
-	      case V8QImode:
-		ftype = reinterp_ftype_dreg[0][rhs_d];
-		break;
-	      case V4HImode:
-		ftype = reinterp_ftype_dreg[1][rhs_d];
-		break;
-	      case V2SImode:
-		ftype = reinterp_ftype_dreg[2][rhs_d];
-		break;
-	      case V2SFmode:
-		ftype = reinterp_ftype_dreg[3][rhs_d];
-		break;
-	      case DImode:
-		ftype = reinterp_ftype_dreg[4][rhs_d];
-		break;
-	      case DFmode:
-		ftype = reinterp_ftype_dreg[5][rhs_d];
-		break;
-	      case V16QImode:
-		ftype = reinterp_ftype_qreg[0][rhs_q];
-		break;
-	      case V8HImode:
-		ftype = reinterp_ftype_qreg[1][rhs_q];
-		break;
-	      case V4SImode:
-		ftype = reinterp_ftype_qreg[2][rhs_q];
-		break;
-	      case V4SFmode:
-		ftype = reinterp_ftype_qreg[3][rhs_q];
-		break;
-	      case V2DImode:
-		ftype = reinterp_ftype_qreg[4][rhs_q];
-		break;
-	      case V2DFmode:
-		ftype = reinterp_ftype_qreg[5][rhs_q];
-		break;
-	      default:
-		gcc_unreachable ();
-	      }
-	  }
-	  break;
+      ftype = build_function_type (return_type, args);
 
-	default:
-	  gcc_unreachable ();
-	}
       gcc_assert (ftype != NULL);
 
-      snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s",
-		d->name, modenames[d->mode]);
+      if (print_type_signature_p)
+	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s_%s",
+		  d->name, modenames[d->mode], type_signature);
+      else
+	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s",
+		  d->name, modenames[d->mode]);
 
       fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD,
 				     NULL, NULL_TREE);
@@ -952,8 +668,6 @@ typedef enum
   SIMD_ARG_STOP
 } builtin_simd_arg;
 
-#define SIMD_MAX_BUILTIN_ARGS 5
-
 static rtx
 aarch64_simd_expand_args (rtx target, int icode, int have_retval,
 			  tree exp, ...)
@@ -1081,99 +795,58 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
 {
   aarch64_simd_builtin_datum *d =
 		&aarch64_simd_builtin_data[fcode - (AARCH64_SIMD_BUILTIN_BASE + 1)];
-  aarch64_simd_itype itype = d->itype;
   enum insn_code icode = d->code;
+  builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS];
+  int num_args = insn_data[d->code].n_operands;
+  int is_void = 0;
+  int k;
 
-  switch (itype)
-    {
-    case AARCH64_SIMD_UNOP:
-      return aarch64_simd_expand_args (target, icode, 1, exp,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_STOP);
+  is_void = !!(d->qualifiers[0] & qualifier_void);
 
-    case AARCH64_SIMD_BINOP:
-      {
-        rtx arg2 = expand_normal (CALL_EXPR_ARG (exp, 1));
-        /* Handle constants only if the predicate allows it.  */
-	bool op1_const_int_p =
-	  (CONST_INT_P (arg2)
-	   && (*insn_data[icode].operand[2].predicate)
-		(arg2, insn_data[icode].operand[2].mode));
-	return aarch64_simd_expand_args
-	  (target, icode, 1, exp,
-	   SIMD_ARG_COPY_TO_REG,
-	   op1_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG,
-	   SIMD_ARG_STOP);
-      }
+  num_args += is_void;
+
+  for (k = 1; k < num_args; k++)
+    {
+      /* We have four arrays of data, each indexed in a different fashion.
+	 qualifiers - element 0 always describes the function return type.
+	 operands - element 0 is either the operand for return value (if
+	   the function has a non-void return type) or the operand for the
+	   first argument.
+	 expr_args - element 0 always holds the first argument.
+	 args - element 0 is always used for the return type.  */
+      int qualifiers_k = k;
+      int operands_k = k - is_void;
+      int expr_args_k = k - 1;
+
+      if (d->qualifiers[qualifiers_k] & qualifier_immediate)
+	args[k] = SIMD_ARG_CONSTANT;
+      else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
+	{
+	  rtx arg
+	    = expand_normal (CALL_EXPR_ARG (exp,
+					    (expr_args_k)));
+	  /* Handle constants only if the predicate allows it.  */
+	  bool op_const_int_p =
+	    (CONST_INT_P (arg)
+	     && (*insn_data[icode].operand[operands_k].predicate)
+		(arg, insn_data[icode].operand[operands_k].mode));
+	  args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG;
+	}
+      else
+	args[k] = SIMD_ARG_COPY_TO_REG;
 
-    case AARCH64_SIMD_TERNOP:
-      return aarch64_simd_expand_args (target, icode, 1, exp,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_STOP);
-
-    case AARCH64_SIMD_QUADOP:
-      return aarch64_simd_expand_args (target, icode, 1, exp,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_STOP);
-    case AARCH64_SIMD_LOAD1:
-    case AARCH64_SIMD_LOADSTRUCT:
-      return aarch64_simd_expand_args (target, icode, 1, exp,
-				       SIMD_ARG_COPY_TO_REG, SIMD_ARG_STOP);
-
-    case AARCH64_SIMD_STORE1:
-    case AARCH64_SIMD_STORESTRUCT:
-      return aarch64_simd_expand_args (target, icode, 0, exp,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_COPY_TO_REG, SIMD_ARG_STOP);
-
-    case AARCH64_SIMD_REINTERP:
-      return aarch64_simd_expand_args (target, icode, 1, exp,
-				       SIMD_ARG_COPY_TO_REG, SIMD_ARG_STOP);
-
-    case AARCH64_SIMD_CREATE:
-      return aarch64_simd_expand_args (target, icode, 1, exp,
-				       SIMD_ARG_COPY_TO_REG, SIMD_ARG_STOP);
-
-    case AARCH64_SIMD_COMBINE:
-      return aarch64_simd_expand_args (target, icode, 1, exp,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_COPY_TO_REG, SIMD_ARG_STOP);
-
-    case AARCH64_SIMD_GETLANE:
-      return aarch64_simd_expand_args (target, icode, 1, exp,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_CONSTANT,
-				       SIMD_ARG_STOP);
-
-    case AARCH64_SIMD_SETLANE:
-      return aarch64_simd_expand_args (target, icode, 1, exp,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_CONSTANT,
-				       SIMD_ARG_STOP);
-
-    case AARCH64_SIMD_SHIFTIMM:
-      return aarch64_simd_expand_args (target, icode, 1, exp,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_CONSTANT,
-				       SIMD_ARG_STOP);
-
-    case AARCH64_SIMD_SHIFTACC:
-    case AARCH64_SIMD_SHIFTINSERT:
-      return aarch64_simd_expand_args (target, icode, 1, exp,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_COPY_TO_REG,
-				       SIMD_ARG_CONSTANT,
-				       SIMD_ARG_STOP);
-
-    default:
-      gcc_unreachable ();
     }
+  args[k] = SIMD_ARG_STOP;
+
+  /* The interface to aarch64_simd_expand_args expects a 0 if
+     the function is void, and a 1 if it is not.  */
+  return aarch64_simd_expand_args
+	  (target, icode, !is_void, exp,
+	   args[1],
+	   args[2],
+	   args[3],
+	   args[4],
+	   SIMD_ARG_STOP);
 }
 
 /* Expand an expression EXP that calls a built-in function,
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index c840aa016e8..51c1ff803b0 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -35,6 +35,4 @@
    therefore serves as a template for adding more CPUs in the future.  */
 
 AARCH64_CORE("cortex-a53",	  cortexa53,	     8,  AARCH64_FL_FPSIMD,    generic)
-AARCH64_CORE("cortex-a57",	  cortexa57,	     8,  AARCH64_FL_FPSIMD,    generic)
-AARCH64_CORE("example-1",	      large,	     8,  AARCH64_FL_FPSIMD,    generic)
-AARCH64_CORE("example-2",	      small,	     8,  AARCH64_FL_FPSIMD,    generic)
+AARCH64_CORE("cortex-a57",	  cortexa15,	     8,  AARCH64_FL_FPSIMD,    generic)
diff --git a/gcc/config/aarch64/aarch64-generic.md b/gcc/config/aarch64/aarch64-generic.md
deleted file mode 100644
index 12faac84348..00000000000
--- a/gcc/config/aarch64/aarch64-generic.md
+++ /dev/null
@@ -1,40 +0,0 @@
-;; Machine description for AArch64 architecture.
-;; Copyright (C) 2009-2013 Free Software Foundation, Inc.
-;; Contributed by ARM Ltd.
-;;
-;; This file is part of GCC.
-;;
-;; GCC is free software; you can redistribute it and/or modify it
-;; under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
-;;
-;; GCC is distributed in the hope that it will be useful, but
-;; WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;; General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-;; Generic scheduler
-
-(define_automaton "aarch64")
-
-(define_cpu_unit "core" "aarch64")
-
-(define_attr "is_load" "yes,no"
-  (if_then_else (eq_attr "v8type" "fpsimd_load,fpsimd_load2,load1,load2")
-	(const_string "yes")
-	(const_string "no")))
-
-(define_insn_reservation "load" 2
-  (and (eq_attr "generic_sched" "yes")
-       (eq_attr "is_load" "yes"))
-  "core")
-
-(define_insn_reservation "nonload" 1
-  (and (eq_attr "generic_sched" "yes")
-       (eq_attr "is_load" "no"))
-  "core")
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index f19045d0792..489fd1cd7b5 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -108,25 +108,6 @@ enum aarch64_symbol_type
    cost models and vectors for address cost calculations, register
    move costs and memory move costs.  */
 
-/* Extra costs for specific insns.  Only records the cost above a
-   single insn.  */
-
-struct cpu_rtx_cost_table
-{
-  const int memory_load;
-  const int memory_store;
-  const int register_shift;
-  const int int_divide;
-  const int float_divide;
-  const int double_divide;
-  const int int_multiply;
-  const int int_multiply_extend;
-  const int int_multiply_add;
-  const int int_multiply_extend_add;
-  const int float_multiply;
-  const int double_multiply;
-};
-
 /* Additional cost for addresses.  */
 struct cpu_addrcost_table
 {
@@ -170,7 +151,7 @@ struct cpu_vector_cost
 
 struct tune_params
 {
-  const struct cpu_rtx_cost_table *const insn_extra_cost;
+  const struct cpu_cost_table *const insn_extra_cost;
   const struct cpu_addrcost_table *const addr_cost;
   const struct cpu_regmove_cost *const regmove_cost;
   const struct cpu_vector_cost *const vec_costs;
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index a747ee8d7ba..b9ebdf54431 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -18,226 +18,6 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-
-; Main data types used by the insntructions
-
-(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,SF,HI,QI"
-  (const_string "unknown"))
-
-
-; Classification of AdvSIMD instructions for scheduling purposes.
-; Do not set this attribute and the "v8type" attribute together in
-; any instruction pattern.
-
-; simd_abd              integer absolute difference and accumulate.
-; simd_abdl             integer absolute difference and accumulate (long).
-; simd_adal             integer add and accumulate (long).
-; simd_add              integer addition/subtraction.
-; simd_addl             integer addition/subtraction (long).
-; simd_addlv            across lanes integer sum (long).
-; simd_addn             integer addition/subtraction (narrow).
-; simd_addn2            integer addition/subtraction (narrow, high).
-; simd_addv             across lanes integer sum.
-; simd_cls              count leading sign/zero bits.
-; simd_cmp              compare / create mask.
-; simd_cnt              population count.
-; simd_dup              duplicate element.
-; simd_dupgp            duplicate general purpose register.
-; simd_ext              bitwise extract from pair.
-; simd_fabd             floating point absolute difference.
-; simd_fadd             floating point add/sub.
-; simd_fcmp             floating point compare.
-; simd_fcvti            floating point convert to integer.
-; simd_fcvtl            floating-point convert upsize.
-; simd_fcvtn            floating-point convert downsize (narrow).
-; simd_fcvtn2           floating-point convert downsize (narrow, high).
-; simd_fdiv             floating point division.
-; simd_fminmax          floating point min/max.
-; simd_fminmaxv         across lanes floating point min/max.
-; simd_fmla             floating point multiply-add.
-; simd_fmla_elt         floating point multiply-add (by element).
-; simd_fmul             floating point multiply.
-; simd_fmul_elt         floating point multiply (by element).
-; simd_fnegabs          floating point neg/abs.
-; simd_frecpe            floating point reciprocal estimate.
-; simd_frecps            floating point reciprocal step.
-; simd_frecpx            floating point reciprocal exponent.
-; simd_frint            floating point round to integer.
-; simd_fsqrt            floating point square root.
-; simd_icvtf            integer convert to floating point.
-; simd_ins              insert element.
-; simd_insgp            insert general purpose register.
-; simd_load1            load multiple structures to one register (LD1).
-; simd_load1r           load single structure to all lanes of one register (LD1R).
-; simd_load1s           load single structure to one lane of one register (LD1 [index]).
-; simd_load2            load multiple structures to two registers (LD1, LD2).
-; simd_load2r           load single structure to all lanes of two registers (LD1R, LD2R).
-; simd_load2s           load single structure to one lane of two registers (LD2 [index]).
-; simd_load3            load multiple structures to three registers (LD1, LD3).
-; simd_load3r           load single structure to all lanes of three registers (LD3R).
-; simd_load3s           load single structure to one lane of three registers (LD3 [index]).
-; simd_load4            load multiple structures to four registers (LD1, LD2, LD4).
-; simd_load4r           load single structure to all lanes of four registers (LD4R).
-; simd_load4s           load single structure to one lane of four registers (LD4 [index]).
-; simd_logic            logical operation.
-; simd_logic_imm        logcial operation (immediate).
-; simd_minmax           integer min/max.
-; simd_minmaxv          across lanes integer min/max,
-; simd_mla              integer multiply-accumulate.
-; simd_mla_elt          integer multiply-accumulate (by element).
-; simd_mlal             integer multiply-accumulate (long).
-; simd_mlal_elt         integer multiply-accumulate (by element, long).
-; simd_move             move register.
-; simd_move_imm         move immediate.
-; simd_movgp            move element to general purpose register.
-; simd_mul              integer multiply.
-; simd_mul_elt          integer multiply (by element).
-; simd_mull             integer multiply (long).
-; simd_mull_elt         integer multiply (by element, long).
-; simd_negabs           integer negate/absolute.
-; simd_rbit             bitwise reverse.
-; simd_rcpe             integer reciprocal estimate.
-; simd_rcps             integer reciprocal square root.
-; simd_rev              element reverse.
-; simd_sat_add          integer saturating addition/subtraction.
-; simd_sat_mlal         integer saturating multiply-accumulate (long).
-; simd_sat_mlal_elt     integer saturating multiply-accumulate (by element, long).
-; simd_sat_mul          integer saturating multiply.
-; simd_sat_mul_elt      integer saturating multiply (by element).
-; simd_sat_mull         integer saturating multiply (long).
-; simd_sat_mull_elt     integer saturating multiply (by element, long).
-; simd_sat_negabs       integer saturating negate/absolute.
-; simd_sat_shift        integer saturating shift.
-; simd_sat_shift_imm    integer saturating shift (immediate).
-; simd_sat_shiftn_imm   integer saturating shift (narrow, immediate).
-; simd_sat_shiftn2_imm  integer saturating shift (narrow, high, immediate).
-; simd_shift            shift register/vector.
-; simd_shift_acc        shift accumulate.
-; simd_shift_imm        shift immediate.
-; simd_shift_imm_acc    shift immediate and accumualte.
-; simd_shiftl           shift register/vector (long).
-; simd_shiftl_imm       shift register/vector (long, immediate).
-; simd_shiftn_imm       shift register/vector (narrow, immediate).
-; simd_shiftn2_imm      shift register/vector (narrow, high, immediate).
-; simd_store1           store multiple structures from one register (ST1).
-; simd_store1s          store single structure from one lane of one register (ST1 [index]).
-; simd_store2           store multiple structures from two registers (ST1, ST2).
-; simd_store2s          store single structure from one lane of two registers (ST2 [index]).
-; simd_store3           store multiple structures from three registers (ST1, ST3).
-; simd_store3s          store single structure from one lane of three register (ST3 [index]).
-; simd_store4           store multiple structures from four registers (ST1, ST2, ST4).
-; simd_store4s          store single structure from one lane for four registers (ST4 [index]).
-; simd_tbl              table lookup.
-; simd_trn              transpose.
-; simd_uzp              unzip.
-; simd_zip              zip.
-
-(define_attr "simd_type"
-   "simd_abd,\
-   simd_abdl,\
-   simd_adal,\
-   simd_add,\
-   simd_addl,\
-   simd_addlv,\
-   simd_addn,\
-   simd_addn2,\
-   simd_addv,\
-   simd_cls,\
-   simd_cmp,\
-   simd_cnt,\
-   simd_dup,\
-   simd_dupgp,\
-   simd_ext,\
-   simd_fabd,\
-   simd_fadd,\
-   simd_fcmp,\
-   simd_fcvti,\
-   simd_fcvtl,\
-   simd_fcvtn,\
-   simd_fcvtn2,\
-   simd_fdiv,\
-   simd_fminmax,\
-   simd_fminmaxv,\
-   simd_fmla,\
-   simd_fmla_elt,\
-   simd_fmul,\
-   simd_fmul_elt,\
-   simd_fnegabs,\
-   simd_frecpe,\
-   simd_frecps,\
-   simd_frecpx,\
-   simd_frint,\
-   simd_fsqrt,\
-   simd_icvtf,\
-   simd_ins,\
-   simd_insgp,\
-   simd_load1,\
-   simd_load1r,\
-   simd_load1s,\
-   simd_load2,\
-   simd_load2r,\
-   simd_load2s,\
-   simd_load3,\
-   simd_load3r,\
-   simd_load3s,\
-   simd_load4,\
-   simd_load4r,\
-   simd_load4s,\
-   simd_logic,\
-   simd_logic_imm,\
-   simd_minmax,\
-   simd_minmaxv,\
-   simd_mla,\
-   simd_mla_elt,\
-   simd_mlal,\
-   simd_mlal_elt,\
-   simd_movgp,\
-   simd_move,\
-   simd_move_imm,\
-   simd_mul,\
-   simd_mul_elt,\
-   simd_mull,\
-   simd_mull_elt,\
-   simd_negabs,\
-   simd_rbit,\
-   simd_rcpe,\
-   simd_rcps,\
-   simd_rev,\
-   simd_sat_add,\
-   simd_sat_mlal,\
-   simd_sat_mlal_elt,\
-   simd_sat_mul,\
-   simd_sat_mul_elt,\
-   simd_sat_mull,\
-   simd_sat_mull_elt,\
-   simd_sat_negabs,\
-   simd_sat_shift,\
-   simd_sat_shift_imm,\
-   simd_sat_shiftn_imm,\
-   simd_sat_shiftn2_imm,\
-   simd_shift,\
-   simd_shift_acc,\
-   simd_shift_imm,\
-   simd_shift_imm_acc,\
-   simd_shiftl,\
-   simd_shiftl_imm,\
-   simd_shiftn_imm,\
-   simd_shiftn2_imm,\
-   simd_store1,\
-   simd_store1s,\
-   simd_store2,\
-   simd_store2s,\
-   simd_store3,\
-   simd_store3s,\
-   simd_store4,\
-   simd_store4s,\
-   simd_tbl,\
-   simd_trn,\
-   simd_uzp,\
-   simd_zip,\
-   none"
-  (const_string "none"))
-
 (define_expand "mov<mode>"
   [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
 	(match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
@@ -268,9 +48,7 @@
   "@
    dup\\t%0.<Vtype>, %<vw>1
    dup\\t%0.<Vtype>, %1.<Vetype>[0]"
-  [(set_attr "simd_type" "simd_dupgp, simd_dup")
-   (set_attr "type" "neon_from_gp<q>, neon_dup<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
 )
 
 (define_insn "aarch64_simd_dup<mode>"
@@ -278,9 +56,7 @@
         (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
-  [(set_attr "simd_type" "simd_dup")
-   (set_attr "type" "neon_dup<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_dup<q>")]
 )
 
 (define_insn "aarch64_dup_lane<mode>"
@@ -292,9 +68,7 @@
           )))]
   "TARGET_SIMD"
   "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
-  [(set_attr "simd_type" "simd_dup")
-   (set_attr "type" "neon_dup<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_dup<q>")]
 )
 
 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
@@ -306,9 +80,7 @@
           )))]
   "TARGET_SIMD"
   "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
-  [(set_attr "simd_type" "simd_dup")
-   (set_attr "type" "neon_dup<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_dup<q>")]
 )
 
 (define_insn "*aarch64_simd_mov<mode>"
@@ -334,11 +106,9 @@
      default: gcc_unreachable ();
      }
 }
-  [(set_attr "simd_type" "simd_load1,simd_store1,simd_move,simd_movgp,simd_insgp,simd_move,simd_move_imm")
-   (set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
+  [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
                      neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
-                     mov_reg, neon_move<q>")
-   (set_attr "simd_mode" "<MODE>")]
+                     mov_reg, neon_move<q>")]
 )
 
 (define_insn "*aarch64_simd_mov<mode>"
@@ -368,11 +138,9 @@
 	gcc_unreachable ();
     }
 }
-  [(set_attr "simd_type" "simd_load1,simd_store1,simd_move,simd_movgp,simd_insgp,simd_move,simd_move_imm")
-   (set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
+  [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
                      neon_logic<q>, multiple, multiple, multiple,\
                      neon_move<q>")
-   (set_attr "simd_mode" "<MODE>")
    (set_attr "length" "4,4,4,8,8,8,4")]
 )
 
@@ -451,9 +219,7 @@
           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
   "TARGET_SIMD && reload_completed"
   "umov\t%0, %1.d[0]"
-  [(set_attr "simd_type" "simd_movgp")
-   (set_attr "type" "neon_to_gp<q>")
-   (set_attr "simd_mode" "<MODE>")
+  [(set_attr "type" "neon_to_gp<q>")
    (set_attr "length" "4")
   ])
 
@@ -464,9 +230,7 @@
           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
   "TARGET_SIMD && reload_completed"
   "umov\t%0, %1.d[1]"
-  [(set_attr "simd_type" "simd_movgp")
-   (set_attr "type" "neon_to_gp<q>")
-   (set_attr "simd_mode" "<MODE>")
+  [(set_attr "type" "neon_to_gp<q>")
    (set_attr "length" "4")
   ])
 
@@ -476,9 +240,7 @@
 		(match_operand:VDQ 2 "register_operand" "w")))]
  "TARGET_SIMD"
  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
-  [(set_attr "simd_type" "simd_logic")
-   (set_attr "type" "neon_logic<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_logic<q>")]
 )
 
 (define_insn "bic<mode>3"
@@ -487,9 +249,7 @@
 		(match_operand:VDQ 2 "register_operand" "w")))]
  "TARGET_SIMD"
  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
-  [(set_attr "simd_type" "simd_logic")
-   (set_attr "type" "neon_logic<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_logic<q>")]
 )
 
 (define_insn "add<mode>3"
@@ -498,9 +258,7 @@
 		  (match_operand:VDQ 2 "register_operand" "w")))]
   "TARGET_SIMD"
   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_add")
-   (set_attr "type" "neon_add<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_add<q>")]
 )
 
 (define_insn "sub<mode>3"
@@ -509,9 +267,7 @@
 		   (match_operand:VDQ 2 "register_operand" "w")))]
   "TARGET_SIMD"
   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_add")
-   (set_attr "type" "neon_sub<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sub<q>")]
 )
 
 (define_insn "mul<mode>3"
@@ -520,9 +276,7 @@
 		   (match_operand:VDQM 2 "register_operand" "w")))]
   "TARGET_SIMD"
   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_mul")
-   (set_attr "type" "neon_mul_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mul_<Vetype><q>")]
 )
 
 (define_insn "*aarch64_mul3_elt<mode>"
@@ -535,9 +289,7 @@
       (match_operand:VMUL 3 "register_operand" "w")))]
   "TARGET_SIMD"
   "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"
-  [(set_attr "simd_type" "simd_<f>mul_elt")
-   (set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 )
 
 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
@@ -550,9 +302,7 @@
       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
   "TARGET_SIMD"
   "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"
-  [(set_attr "simd_type" "simd_<f>mul_elt")
-   (set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 )
 
 (define_insn "*aarch64_mul3_elt_to_128df"
@@ -563,9 +313,7 @@
       (match_operand:V2DF 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "fmul\\t%0.2d, %1.2d, %2.d[0]"
-  [(set_attr "simd_type" "simd_fmul_elt")
-   (set_attr "type" "neon_fp_mul_d_scalar_q")
-   (set_attr "simd_mode" "V2DF")]
+  [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 )
 
 (define_insn "*aarch64_mul3_elt_to_64v2df"
@@ -577,9 +325,7 @@
        (match_operand:DF 3 "register_operand" "w")))]
   "TARGET_SIMD"
   "fmul\\t%0.2d, %3.2d, %1.d[%2]"
-  [(set_attr "simd_type" "simd_fmul_elt")
-   (set_attr "type" "neon_fp_mul_d_scalar_q")
-   (set_attr "simd_mode" "V2DF")]
+  [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 )
 
 (define_insn "neg<mode>2"
@@ -587,9 +333,7 @@
 	(neg:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "neg\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_negabs")
-   (set_attr "type" "neon_neg<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_neg<q>")]
 )
 
 (define_insn "abs<mode>2"
@@ -597,9 +341,7 @@
         (abs:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "abs\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_negabs")
-   (set_attr "type" "neon_abs<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_abs<q>")]
 )
 
 (define_insn "abd<mode>_3"
@@ -609,9 +351,7 @@
 		       (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
   "TARGET_SIMD"
   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_abd")
-   (set_attr "type" "neon_abd<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_abd<q>")]
 )
 
 (define_insn "aba<mode>_3"
@@ -622,9 +362,7 @@
 		       (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
   "TARGET_SIMD"
   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_abd")
-   (set_attr "type" "neon_arith_acc<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_arith_acc<q>")]
 )
 
 (define_insn "fabd<mode>_3"
@@ -634,9 +372,7 @@
 		   (match_operand:VDQF 2 "register_operand" "w"))))]
   "TARGET_SIMD"
   "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_fabd")
-   (set_attr "type" "neon_fp_abd_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
 )
 
 (define_insn "*fabd_scalar<mode>3"
@@ -646,9 +382,7 @@
                  (match_operand:GPF 2 "register_operand" "w"))))]
   "TARGET_SIMD"
   "fabd\t%<s>0, %<s>1, %<s>2"
-  [(set_attr "simd_type" "simd_fabd")
-   (set_attr "type" "neon_fp_abd_<Vetype><q>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
 )
 
 (define_insn "and<mode>3"
@@ -657,9 +391,7 @@
 		 (match_operand:VDQ 2 "register_operand" "w")))]
   "TARGET_SIMD"
   "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
-  [(set_attr "simd_type" "simd_logic")
-   (set_attr "type" "neon_logic<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_logic<q>")]
 )
 
 (define_insn "ior<mode>3"
@@ -668,9 +400,7 @@
 		 (match_operand:VDQ 2 "register_operand" "w")))]
   "TARGET_SIMD"
   "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
-  [(set_attr "simd_type" "simd_logic")
-   (set_attr "type" "neon_logic<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_logic<q>")]
 )
 
 (define_insn "xor<mode>3"
@@ -679,9 +409,7 @@
 		 (match_operand:VDQ 2 "register_operand" "w")))]
   "TARGET_SIMD"
   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
-  [(set_attr "simd_type" "simd_logic")
-   (set_attr "type" "neon_logic<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_logic<q>")]
 )
 
 (define_insn "one_cmpl<mode>2"
@@ -689,23 +417,21 @@
         (not:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "not\t%0.<Vbtype>, %1.<Vbtype>"
-  [(set_attr "simd_type" "simd_logic")
-   (set_attr "type" "neon_logic<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_logic<q>")]
 )
 
 (define_insn "aarch64_simd_vec_set<mode>"
-  [(set (match_operand:VQ_S 0 "register_operand" "=w")
+  [(set (match_operand:VQ_S 0 "register_operand" "=w,w")
         (vec_merge:VQ_S
 	    (vec_duplicate:VQ_S
-		(match_operand:<VEL> 1 "register_operand" "r"))
-	    (match_operand:VQ_S 3 "register_operand" "0")
-	    (match_operand:SI 2 "immediate_operand" "i")))]
+		(match_operand:<VEL> 1 "register_operand" "r,w"))
+	    (match_operand:VQ_S 3 "register_operand" "0,0")
+	    (match_operand:SI 2 "immediate_operand" "i,i")))]
   "TARGET_SIMD"
-  "ins\t%0.<Vetype>[%p2], %w1";
-  [(set_attr "simd_type" "simd_insgp")
-   (set_attr "type" "neon_from_gp<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  "@
+   ins\t%0.<Vetype>[%p2], %w1
+   ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]"
+  [(set_attr "type" "neon_from_gp<q>, neon_ins<q>")]
 )
 
 (define_insn "aarch64_simd_lshr<mode>"
@@ -714,9 +440,7 @@
 		     (match_operand:VDQ  2 "aarch64_simd_rshift_imm" "Dr")))]
  "TARGET_SIMD"
  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
-  [(set_attr "simd_type" "simd_shift_imm")
-   (set_attr "type" "neon_shift_imm<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_shift_imm<q>")]
 )
 
 (define_insn "aarch64_simd_ashr<mode>"
@@ -725,9 +449,7 @@
 		     (match_operand:VDQ  2 "aarch64_simd_rshift_imm" "Dr")))]
  "TARGET_SIMD"
  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
-  [(set_attr "simd_type" "simd_shift_imm")
-   (set_attr "type" "neon_shift_imm<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_shift_imm<q>")]
 )
 
 (define_insn "aarch64_simd_imm_shl<mode>"
@@ -736,9 +458,7 @@
 		   (match_operand:VDQ  2 "aarch64_simd_lshift_imm" "Dl")))]
  "TARGET_SIMD"
   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
-  [(set_attr "simd_type" "simd_shift_imm")
-   (set_attr "type" "neon_shift_imm<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_shift_imm<q>")]
 )
 
 (define_insn "aarch64_simd_reg_sshl<mode>"
@@ -747,9 +467,7 @@
 		   (match_operand:VDQ 2 "register_operand" "w")))]
  "TARGET_SIMD"
  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_shift")
-   (set_attr "type" "neon_shift_reg<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_shift_reg<q>")]
 )
 
 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
@@ -759,9 +477,7 @@
 		   UNSPEC_ASHIFT_UNSIGNED))]
  "TARGET_SIMD"
  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_shift")
-   (set_attr "type" "neon_shift_reg<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_shift_reg<q>")]
 )
 
 (define_insn "aarch64_simd_reg_shl<mode>_signed"
@@ -771,9 +487,7 @@
 		   UNSPEC_ASHIFT_SIGNED))]
  "TARGET_SIMD"
  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_shift")
-   (set_attr "type" "neon_shift_reg<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_shift_reg<q>")]
 )
 
 (define_expand "ashl<mode>3"
@@ -958,9 +672,9 @@
 })
 
 (define_expand "vec_set<mode>"
-  [(match_operand:VQ_S 0 "register_operand" "+w")
-   (match_operand:<VEL> 1 "register_operand" "r")
-   (match_operand:SI 2 "immediate_operand" "")]
+  [(match_operand:VQ_S 0 "register_operand")
+   (match_operand:<VEL> 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
   "TARGET_SIMD"
   {
     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
@@ -971,23 +685,23 @@
 )
 
 (define_insn "aarch64_simd_vec_setv2di"
-  [(set (match_operand:V2DI 0 "register_operand" "=w")
+  [(set (match_operand:V2DI 0 "register_operand" "=w,w")
         (vec_merge:V2DI
 	    (vec_duplicate:V2DI
-		(match_operand:DI 1 "register_operand" "r"))
-	    (match_operand:V2DI 3 "register_operand" "0")
-	    (match_operand:SI 2 "immediate_operand" "i")))]
+		(match_operand:DI 1 "register_operand" "r,w"))
+	    (match_operand:V2DI 3 "register_operand" "0,0")
+	    (match_operand:SI 2 "immediate_operand" "i,i")))]
   "TARGET_SIMD"
-  "ins\t%0.d[%p2], %1";
-  [(set_attr "simd_type" "simd_insgp")
-   (set_attr "type" "neon_from_gp")
-   (set_attr "simd_mode" "V2DI")]
+  "@
+   ins\t%0.d[%p2], %1
+   ins\\t%0.d[%p2], %1.d[0]"
+  [(set_attr "type" "neon_from_gp, neon_ins_q")]
 )
 
 (define_expand "vec_setv2di"
-  [(match_operand:V2DI 0 "register_operand" "+w")
-   (match_operand:DI 1 "register_operand" "r")
-   (match_operand:SI 2 "immediate_operand" "")]
+  [(match_operand:V2DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
   "TARGET_SIMD"
   {
     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
@@ -1006,9 +720,7 @@
 	    (match_operand:SI 2 "immediate_operand" "i")))]
   "TARGET_SIMD"
   "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
-  [(set_attr "simd_type" "simd_ins")
-   (set_attr "type" "neon_ins<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_ins<q>")]
 )
 
 (define_expand "vec_set<mode>"
@@ -1032,9 +744,7 @@
 		  (match_operand:VQ_S 1 "register_operand" "0")))]
  "TARGET_SIMD"
  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
-  [(set_attr "simd_type" "simd_mla")
-   (set_attr "type" "neon_mla_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mla_<Vetype><q>")]
 )
 
 (define_insn "*aarch64_mla_elt<mode>"
@@ -1049,9 +759,7 @@
 	 (match_operand:VDQHS 4 "register_operand" "0")))]
  "TARGET_SIMD"
  "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
-  [(set_attr "simd_type" "simd_mla")
-   (set_attr "type" "neon_mla_<Vetype>_scalar<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
@@ -1066,9 +774,7 @@
 	 (match_operand:VDQHS 4 "register_operand" "0")))]
  "TARGET_SIMD"
  "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
-  [(set_attr "simd_type" "simd_mla")
-   (set_attr "type" "neon_mla_<Vetype>_scalar<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
 (define_insn "aarch64_mls<mode>"
@@ -1078,9 +784,7 @@
 			      (match_operand:VQ_S 3 "register_operand" "w"))))]
  "TARGET_SIMD"
  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
-  [(set_attr "simd_type" "simd_mla")
-   (set_attr "type" "neon_mla_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mla_<Vetype><q>")]
 )
 
 (define_insn "*aarch64_mls_elt<mode>"
@@ -1095,9 +799,7 @@
 	   (match_operand:VDQHS 3 "register_operand" "w"))))]
  "TARGET_SIMD"
  "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
-  [(set_attr "simd_type" "simd_mla")
-   (set_attr "type" "neon_mla_<Vetype>_scalar<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
@@ -1112,9 +814,7 @@
 	   (match_operand:VDQHS 3 "register_operand" "w"))))]
  "TARGET_SIMD"
  "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
-  [(set_attr "simd_type" "simd_mla")
-   (set_attr "type" "neon_mla_<Vetype>_scalar<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
 ;; Max/Min operations.
@@ -1124,9 +824,7 @@
 		    (match_operand:VQ_S 2 "register_operand" "w")))]
  "TARGET_SIMD"
  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_minmax")
-   (set_attr "type" "neon_minmax<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_minmax<q>")]
 )
 
 ;; Move into low-half clearing high half to 0.
@@ -1141,10 +839,7 @@
    dup\\t%d0, %1.d[0]
    fmov\\t%d0, %1
    dup\\t%d0, %1"
-  [(set_attr "v8type" "*,fmov,*")
-   (set_attr "type" "neon_dup<q>,fmov,neon_dup<q>")
-   (set_attr "simd_type" "simd_dup,*,simd_dup")
-   (set_attr "simd_mode" "<MODE>")
+  [(set_attr "type" "neon_dup<q>,fmov,neon_dup<q>")
    (set_attr "simd" "yes,*,yes")
    (set_attr "fp" "*,yes,*")
    (set_attr "length" "4")]
@@ -1163,9 +858,7 @@
   "@
    ins\\t%0.d[1], %1.d[0]
    ins\\t%0.d[1], %1"
-  [(set_attr "simd_type" "simd_ins,simd_ins")
-   (set_attr "type" "neon_ins")
-   (set_attr "simd_mode" "<MODE>")
+  [(set_attr "type" "neon_ins")
    (set_attr "length" "4")]
 )
 
@@ -1188,9 +881,7 @@
        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
  "TARGET_SIMD"
  "xtn\\t%0.<Vntype>, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_shiftn_imm")
-   (set_attr "type" "neon_shift_imm_narrow_q")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
 (define_expand "vec_pack_trunc_<mode>"
@@ -1216,9 +907,7 @@
 	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
  "TARGET_SIMD"
  "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_shiftn2_imm")
-   (set_attr "type" "multiple")
-   (set_attr "simd_mode" "<MODE>")
+  [(set_attr "type" "multiple")
    (set_attr "length" "8")]
 )
 
@@ -1232,9 +921,7 @@
 			    )))]
   "TARGET_SIMD"
   "<su>shll %0.<Vwtype>, %1.<Vhalftype>, 0"
-  [(set_attr "simd_type" "simd_shiftl_imm")
-   (set_attr "type" "neon_shift_imm_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_shift_imm_long")]
 )
 
 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
@@ -1245,9 +932,7 @@
 			    )))]
   "TARGET_SIMD"
   "<su>shll2 %0.<Vwtype>, %1.<Vtype>, 0"
-  [(set_attr "simd_type" "simd_shiftl_imm")
-   (set_attr "type" "neon_shift_imm_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_shift_imm_long")]
 )
 
 (define_expand "vec_unpack<su>_hi_<mode>"
@@ -1289,9 +974,7 @@
           (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
-  [(set_attr "simd_type" "simd_mlal")
-   (set_attr "type" "neon_mla_<Vetype>_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
 )
 
 (define_insn "*aarch64_<su>mlal_hi<mode>"
@@ -1307,9 +990,7 @@
           (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
-  [(set_attr "simd_type" "simd_mlal")
-   (set_attr "type" "neon_mla_<Vetype>_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
 )
 
 (define_insn "*aarch64_<su>mlsl_lo<mode>"
@@ -1325,9 +1006,7 @@
                  (match_dup 3))))))]
   "TARGET_SIMD"
   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
-  [(set_attr "simd_type" "simd_mlal")
-   (set_attr "type" "neon_mla_<Vetype>_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
 )
 
 (define_insn "*aarch64_<su>mlsl_hi<mode>"
@@ -1343,9 +1022,7 @@
                  (match_dup 3))))))]
   "TARGET_SIMD"
   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
-  [(set_attr "simd_type" "simd_mlal")
-   (set_attr "type" "neon_mla_<Vetype>_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
 )
 
 (define_insn "*aarch64_<su>mlal<mode>"
@@ -1359,9 +1036,7 @@
           (match_operand:<VWIDE> 3 "register_operand" "0")))]
   "TARGET_SIMD"
   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_mlal")
-   (set_attr "type" "neon_mla_<Vetype>_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
 )
 
 (define_insn "*aarch64_<su>mlsl<mode>"
@@ -1375,9 +1050,7 @@
               (match_operand:VDW 3 "register_operand" "w")))))]
   "TARGET_SIMD"
   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
-  [(set_attr "simd_type" "simd_mlal")
-   (set_attr "type" "neon_mla_<Vetype>_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
 )
 
 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
@@ -1390,9 +1063,7 @@
                            (match_dup 3)))))]
   "TARGET_SIMD"
   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
-  [(set_attr "simd_type" "simd_mull")
-   (set_attr "type" "neon_mul_<Vetype>_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mul_<Vetype>_long")]
 )
 
 (define_expand "vec_widen_<su>mult_lo_<mode>"
@@ -1419,9 +1090,7 @@
 			    (match_dup 3)))))]
   "TARGET_SIMD"
   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_mull")
-   (set_attr "type" "neon_mul_<Vetype>_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mul_<Vetype>_long")]
 )
 
 (define_expand "vec_widen_<su>mult_hi_<mode>"
@@ -1470,9 +1139,7 @@
 		  (match_operand:VDQF 2 "register_operand" "w")))]
  "TARGET_SIMD"
  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_fadd")
-   (set_attr "type" "neon_fp_addsub_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
 )
 
 (define_insn "sub<mode>3"
@@ -1481,9 +1148,7 @@
 		   (match_operand:VDQF 2 "register_operand" "w")))]
  "TARGET_SIMD"
  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_fadd")
-   (set_attr "type" "neon_fp_addsub_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
 )
 
 (define_insn "mul<mode>3"
@@ -1492,9 +1157,7 @@
 		  (match_operand:VDQF 2 "register_operand" "w")))]
  "TARGET_SIMD"
  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_fmul")
-   (set_attr "type" "neon_fp_mul_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
 )
 
 (define_insn "div<mode>3"
@@ -1503,9 +1166,7 @@
 		 (match_operand:VDQF 2 "register_operand" "w")))]
  "TARGET_SIMD"
  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_fdiv")
-   (set_attr "type" "neon_fp_div_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_div_<Vetype><q>")]
 )
 
 (define_insn "neg<mode>2"
@@ -1513,9 +1174,7 @@
        (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
  "TARGET_SIMD"
  "fneg\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_fnegabs")
-   (set_attr "type" "neon_fp_neg_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
 )
 
 (define_insn "abs<mode>2"
@@ -1523,9 +1182,7 @@
        (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
  "TARGET_SIMD"
  "fabs\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_fnegabs")
-   (set_attr "type" "neon_fp_abs_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
 )
 
 (define_insn "fma<mode>4"
@@ -1535,9 +1192,7 @@
                 (match_operand:VDQF 3 "register_operand" "0")))]
   "TARGET_SIMD"
  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_fmla")
-   (set_attr "type" "neon_fp_mla_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
 )
 
 (define_insn "*aarch64_fma4_elt<mode>"
@@ -1551,9 +1206,7 @@
       (match_operand:VDQF 4 "register_operand" "0")))]
   "TARGET_SIMD"
   "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
-  [(set_attr "simd_type" "simd_fmla_elt")
-   (set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
 )
 
 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
@@ -1567,9 +1220,7 @@
       (match_operand:VDQSF 4 "register_operand" "0")))]
   "TARGET_SIMD"
   "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
-  [(set_attr "simd_type" "simd_fmla_elt")
-   (set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
 )
 
 (define_insn "*aarch64_fma4_elt_to_128df"
@@ -1581,9 +1232,7 @@
       (match_operand:V2DF 3 "register_operand" "0")))]
   "TARGET_SIMD"
   "fmla\\t%0.2d, %2.2d, %1.2d[0]"
-  [(set_attr "simd_type" "simd_fmla_elt")
-   (set_attr "type" "neon_fp_mla_d_scalar_q")
-   (set_attr "simd_mode" "V2DF")]
+  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
 )
 
 (define_insn "*aarch64_fma4_elt_to_64v2df"
@@ -1596,9 +1245,7 @@
       (match_operand:DF 4 "register_operand" "0")))]
   "TARGET_SIMD"
   "fmla\\t%0.2d, %3.2d, %1.2d[%2]"
-  [(set_attr "simd_type" "simd_fmla_elt")
-   (set_attr "type" "neon_fp_mla_d_scalar_q")
-   (set_attr "simd_mode" "V2DF")]
+  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
 )
 
 (define_insn "fnma<mode>4"
@@ -1610,9 +1257,7 @@
 	  (match_operand:VDQF 3 "register_operand" "0")))]
   "TARGET_SIMD"
  "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_fmla")
-   (set_attr "type" "neon_fp_mla_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
 )
 
 (define_insn "*aarch64_fnma4_elt<mode>"
@@ -1627,9 +1272,7 @@
       (match_operand:VDQF 4 "register_operand" "0")))]
   "TARGET_SIMD"
   "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
-  [(set_attr "simd_type" "simd_fmla_elt")
-   (set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
 )
 
 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
@@ -1644,9 +1287,7 @@
       (match_operand:VDQSF 4 "register_operand" "0")))]
   "TARGET_SIMD"
   "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
-  [(set_attr "simd_type" "simd_fmla_elt")
-   (set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
 )
 
 (define_insn "*aarch64_fnma4_elt_to_128df"
@@ -1659,9 +1300,7 @@
       (match_operand:V2DF 3 "register_operand" "0")))]
   "TARGET_SIMD"
   "fmls\\t%0.2d, %2.2d, %1.2d[0]"
-  [(set_attr "simd_type" "simd_fmla_elt")
-   (set_attr "type" "neon_fp_mla_d_scalar_q")
-   (set_attr "simd_mode" "V2DF")]
+  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
 )
 
 (define_insn "*aarch64_fnma4_elt_to_64v2df"
@@ -1675,9 +1314,7 @@
       (match_operand:DF 4 "register_operand" "0")))]
   "TARGET_SIMD"
   "fmls\\t%0.2d, %3.2d, %1.2d[%2]"
-  [(set_attr "simd_type" "simd_fmla_elt")
-   (set_attr "type" "neon_fp_mla_d_scalar_q")
-   (set_attr "simd_mode" "V2DF")]
+  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
 )
 
 ;; Vector versions of the floating-point frint patterns.
@@ -1688,9 +1325,7 @@
 		      FRINT))]
   "TARGET_SIMD"
   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_frint")
-   (set_attr "type" "neon_fp_round_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_round_<Vetype><q>")]
 )
 
 ;; Vector versions of the fcvt standard patterns.
@@ -1702,9 +1337,7 @@
 			       FCVT)))]
   "TARGET_SIMD"
   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_fcvti")
-   (set_attr "type" "neon_fp_to_int_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
 )
 
 (define_expand "<optab><VDQF:mode><fcvt_target>2"
@@ -1736,9 +1369,7 @@
 	  (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_icvtf")
-   (set_attr "type" "neon_int_to_fp_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
 )
 
 ;; Conversions between vectors of floats and doubles.
@@ -1756,9 +1387,7 @@
 	  )))]
   "TARGET_SIMD"
   "fcvtl\\t%0.2d, %1.2s"
-  [(set_attr "simd_type" "simd_fcvtl")
-   (set_attr "type" "neon_fp_cvt_widen_s")
-   (set_attr "simd_mode" "V2DF")]
+  [(set_attr "type" "neon_fp_cvt_widen_s")]
 )
 
 (define_insn "aarch64_float_extend_lo_v2df"
@@ -1767,9 +1396,7 @@
 	  (match_operand:V2SF 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "fcvtl\\t%0.2d, %1.2s"
-  [(set_attr "simd_type" "simd_fcvtl")
-   (set_attr "type" "neon_fp_cvt_widen_s")
-   (set_attr "simd_mode" "V2DF")]
+  [(set_attr "type" "neon_fp_cvt_widen_s")]
 )
 
 (define_insn "vec_unpacks_hi_v4sf"
@@ -1781,9 +1408,7 @@
 	  )))]
   "TARGET_SIMD"
   "fcvtl2\\t%0.2d, %1.4s"
-  [(set_attr "simd_type" "simd_fcvtl")
-   (set_attr "type" "neon_fp_cvt_widen_s")
-   (set_attr "simd_mode" "V2DF")]
+  [(set_attr "type" "neon_fp_cvt_widen_s")]
 )
 
 ;; Float narrowing operations.
@@ -1794,9 +1419,7 @@
 	(match_operand:V2DF 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "fcvtn\\t%0.2s, %1.2d"
-  [(set_attr "simd_type" "simd_fcvtl")
-   (set_attr "type" "neon_fp_cvt_narrow_d_q")
-   (set_attr "simd_mode" "V2SF")]
+  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
 )
 
 (define_insn "aarch64_float_truncate_hi_v4sf"
@@ -1807,9 +1430,7 @@
 	(match_operand:V2DF 2 "register_operand" "w"))))]
   "TARGET_SIMD"
   "fcvtn2\\t%0.4s, %2.2d"
-  [(set_attr "simd_type" "simd_fcvtl")
-   (set_attr "type" "neon_fp_cvt_narrow_d_q")
-   (set_attr "simd_mode" "V4SF")]
+  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
 )
 
 (define_expand "vec_pack_trunc_v2df"
@@ -1855,9 +1476,7 @@
 			      (match_operand:VDQF 3 "register_operand" "w"))))]
   "TARGET_SIMD"
  "fmls\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
-  [(set_attr "simd_type" "simd_fmla")
-   (set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
 )
 
 ;; FP Max/Min
@@ -1880,9 +1499,7 @@
 		   (match_operand:VDQF 2 "register_operand" "w")))]
   "TARGET_SIMD"
   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_fminmax")
-   (set_attr "type" "neon_fp_minmax_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
 )
 
 (define_insn "<maxmin_uns><mode>3"
@@ -1892,9 +1509,7 @@
 		    FMAXMIN_UNS))]
   "TARGET_SIMD"
   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_fminmax")
-   (set_attr "type" "neon_fp_minmax_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
 )
 
 ;; 'across lanes' add.
@@ -1905,9 +1520,7 @@
 		    SUADDV))]
  "TARGET_SIMD"
  "addv\\t%<Vetype>0, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_addv")
-   (set_attr "type" "neon_reduc_add<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_reduc_add<q>")]
 )
 
 (define_insn "reduc_<sur>plus_v2di"
@@ -1916,9 +1529,7 @@
 		    SUADDV))]
  "TARGET_SIMD"
  "addp\\t%d0, %1.2d"
-  [(set_attr "simd_type" "simd_addv")
-   (set_attr "type" "neon_reduc_add_q")
-   (set_attr "simd_mode" "V2DI")]
+  [(set_attr "type" "neon_reduc_add_q")]
 )
 
 (define_insn "reduc_<sur>plus_v2si"
@@ -1927,9 +1538,7 @@
 		    SUADDV))]
  "TARGET_SIMD"
  "addp\\t%0.2s, %1.2s, %1.2s"
-  [(set_attr "simd_type" "simd_addv")
-   (set_attr "type" "neon_reduc_add")
-   (set_attr "simd_mode" "V2SI")]
+  [(set_attr "type" "neon_reduc_add")]
 )
 
 (define_insn "reduc_<sur>plus_<mode>"
@@ -1938,9 +1547,7 @@
 		    SUADDV))]
  "TARGET_SIMD"
  "faddp\\t%<Vetype>0, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_fadd")
-   (set_attr "type" "neon_fp_reduc_add_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
 )
 
 (define_insn "aarch64_addpv4sf"
@@ -1949,9 +1556,7 @@
 		    UNSPEC_FADDV))]
  "TARGET_SIMD"
  "faddp\\t%0.4s, %1.4s, %1.4s"
-  [(set_attr "simd_type" "simd_fadd")
-   (set_attr "type" "neon_fp_reduc_add_s_q")
-   (set_attr "simd_mode" "V4SF")]
+  [(set_attr "type" "neon_fp_reduc_add_s_q")]
 )
 
 (define_expand "reduc_<sur>plus_v4sf"
@@ -1971,9 +1576,7 @@
        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
  "TARGET_SIMD"
  "clz\\t%0.<Vtype>, %1.<Vtype>"
- [(set_attr "simd_type" "simd_cls")
-  (set_attr "type" "neon_cls<q>")
-  (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_cls<q>")]
 )
 
 ;; 'across lanes' max and min ops.
@@ -1984,9 +1587,7 @@
 		    MAXMINV))]
  "TARGET_SIMD"
  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_minmaxv")
-   (set_attr "type" "neon_reduc_minmax<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_reduc_minmax<q>")]
 )
 
 (define_insn "reduc_<maxmin_uns>_v2di"
@@ -1995,9 +1596,7 @@
 		    MAXMINV))]
  "TARGET_SIMD"
  "<maxmin_uns_op>p\\t%d0, %1.2d"
-  [(set_attr "simd_type" "simd_minmaxv")
-   (set_attr "type" "neon_reduc_minmax_q")
-   (set_attr "simd_mode" "V2DI")]
+  [(set_attr "type" "neon_reduc_minmax_q")]
 )
 
 (define_insn "reduc_<maxmin_uns>_v2si"
@@ -2006,9 +1605,7 @@
 		    MAXMINV))]
  "TARGET_SIMD"
  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
-  [(set_attr "simd_type" "simd_minmaxv")
-   (set_attr "type" "neon_reduc_minmax")
-   (set_attr "simd_mode" "V2SI")]
+  [(set_attr "type" "neon_reduc_minmax")]
 )
 
 (define_insn "reduc_<maxmin_uns>_<mode>"
@@ -2017,9 +1614,7 @@
 		    FMAXMINV))]
  "TARGET_SIMD"
  "<maxmin_uns_op>p\\t%<Vetype>0, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_fminmaxv")
-   (set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
 )
 
 (define_insn "reduc_<maxmin_uns>_v4sf"
@@ -2028,9 +1623,7 @@
 		    FMAXMINV))]
  "TARGET_SIMD"
  "<maxmin_uns_op>v\\t%s0, %1.4s"
-  [(set_attr "simd_type" "simd_fminmaxv")
-   (set_attr "type" "neon_fp_reduc_minmax_s_q")
-   (set_attr "simd_mode" "V4SF")]
+  [(set_attr "type" "neon_fp_reduc_minmax_s_q")]
 )
 
 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
@@ -2065,8 +1658,7 @@
   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
-  [(set_attr "simd_mode" "<MODE>")
-   (set_attr "type" "neon_bsl<q>")]
+  [(set_attr "type" "neon_bsl<q>")]
 )
 
 (define_expand "aarch64_simd_bsl<mode>"
@@ -2431,9 +2023,7 @@
 	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
   "TARGET_SIMD"
   "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"
-  [(set_attr "simd_type" "simd_movgp")
-   (set_attr "type" "neon_to_gp<q>")
-   (set_attr "simd_mode" "<VDQQH:MODE>")]
+  [(set_attr "type" "neon_to_gp<q>")]
 )
 
 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
@@ -2444,9 +2034,7 @@
 	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
   "TARGET_SIMD"
   "umov\\t%w0, %1.<Vetype>[%2]"
-  [(set_attr "simd_type" "simd_movgp")
-   (set_attr "type" "neon_to_gp<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_to_gp<q>")]
 )
 
 ;; Lane extraction of a value, neither sign nor zero extension
@@ -2460,9 +2048,7 @@
   "@
    umov\\t%<vwcore>0, %1.<Vetype>[%2]
    dup\\t%<Vetype>0, %1.<Vetype>[%2]"
-  [(set_attr "simd_type" "simd_movgp, simd_dup")
-   (set_attr "type" "neon_to_gp<q>, neon_dup<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_to_gp<q>, neon_dup<q>")]
 )
 
 (define_expand "aarch64_get_lanedi"
@@ -2585,9 +2171,7 @@
 	   (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")))]
   "TARGET_SIMD"
   "mov\\t%0.8b, %1.8b"
-  [(set_attr "simd_type" "simd_move")
-   (set_attr "type" "neon_move<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_move<q>")]
 )
 
 (define_insn_and_split "aarch64_combine<mode>"
@@ -2630,9 +2214,7 @@
 			   (match_dup 3)))))]
   "TARGET_SIMD"
   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_addl")
-   (set_attr "type" "neon_<ADDSUB:optab>_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
 )
 
 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
@@ -2645,9 +2227,7 @@
                            (match_dup 3)))))]
   "TARGET_SIMD"
   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
-  [(set_attr "simd_type" "simd_addl")
-   (set_attr "type" "neon_<ADDSUB:optab>_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
 )
 
 
@@ -2707,9 +2287,7 @@
 			   (match_operand:VDW 2 "register_operand" "w"))))]
   "TARGET_SIMD"
   "<ANY_EXTEND:su><ADDSUB:optab>l %0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_addl")
-   (set_attr "type" "neon_<ADDSUB:optab>_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
 )
 
 ;; <su><addsub>w<q>.
@@ -2721,9 +2299,7 @@
 			  (match_operand:VDW 2 "register_operand" "w"))))]
   "TARGET_SIMD"
   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_addl")
-   (set_attr "type" "neon_<ADDSUB:optab>_widen")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
 )
 
 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
@@ -2735,9 +2311,7 @@
 			   (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
   "TARGET_SIMD"
   "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_addl")
-   (set_attr "type" "neon_<ADDSUB:optab>_widen")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
 )
 
 (define_expand "aarch64_saddw2<mode>"
@@ -2798,9 +2372,7 @@
 		     HADDSUB))]
   "TARGET_SIMD"
   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_add")
-   (set_attr "type" "neon_<addsub>_halve<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_<addsub>_halve<q>")]
 )
 
 ;; <r><addsub>hn<q>.
@@ -2812,9 +2384,7 @@
                            ADDSUBHN))]
   "TARGET_SIMD"
   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_addn")
-   (set_attr "type" "neon_<addsub>_halve_narrow_q")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
 )
 
 (define_insn "aarch64_<sur><addsub>hn2<mode>"
@@ -2825,9 +2395,7 @@
                             ADDSUBHN2))]
   "TARGET_SIMD"
   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
-  [(set_attr "simd_type" "simd_addn2")
-   (set_attr "type" "neon_<addsub>_halve_narrow_q")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
 )
 
 ;; pmul.
@@ -2839,9 +2407,7 @@
 		   UNSPEC_PMUL))]
  "TARGET_SIMD"
  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_mul")
-   (set_attr "type" "neon_mul_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_mul_<Vetype><q>")]
 )
 
 ;; <su>q<addsub>
@@ -2852,9 +2418,7 @@
 			  (match_operand:VSDQ_I 2 "register_operand" "w")))]
   "TARGET_SIMD"
   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
-  [(set_attr "simd_type" "simd_add")
-   (set_attr "type" "neon_<optab><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_<optab><q>")]
 )
 
 ;; suqadd and usqadd
@@ -2866,9 +2430,7 @@
 		       USSUQADD))]
   "TARGET_SIMD"
   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
-  [(set_attr "simd_type" "simd_sat_add")
-   (set_attr "type" "neon_qadd<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_qadd<q>")]
 )
 
 ;; sqmovun
@@ -2879,9 +2441,7 @@
                             UNSPEC_SQXTUN))]
    "TARGET_SIMD"
    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
-   [(set_attr "simd_type" "simd_sat_shiftn_imm")
-    (set_attr "type" "neon_sat_shift_imm_narrow_q")
-    (set_attr "simd_mode" "<MODE>")]
+   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
  )
 
 ;; sqmovn and uqmovn
@@ -2892,9 +2452,7 @@
                             SUQMOVN))]
   "TARGET_SIMD"
   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
-   [(set_attr "simd_type" "simd_sat_shiftn_imm")
-    (set_attr "type" "neon_sat_shift_imm_narrow_q")
-    (set_attr "simd_mode" "<MODE>")]
+   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
  )
 
 ;; <su>q<absneg>
@@ -2905,9 +2463,7 @@
 	  (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
-  [(set_attr "simd_type" "simd_sat_negabs")
-   (set_attr "type" "neon_<optab><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_<optab><q>")]
 )
 
 ;; sq<r>dmulh.
@@ -2920,9 +2476,7 @@
 	 VQDMULH))]
   "TARGET_SIMD"
   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
-  [(set_attr "simd_type" "simd_sat_mul")
-   (set_attr "type" "neon_sat_mul_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
 )
 
 ;; sq<r>dmulh_lane
@@ -2939,9 +2493,7 @@
   "*
    aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
-  [(set_attr "simd_type" "simd_sat_mul")
-   (set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
 )
 
 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
@@ -2956,9 +2508,7 @@
   "*
    aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
-  [(set_attr "simd_type" "simd_sat_mul")
-   (set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
 )
 
 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
@@ -2973,9 +2523,7 @@
   "*
    aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
-  [(set_attr "simd_type" "simd_sat_mul")
-   (set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
 )
 
 ;; vqdml[sa]l
@@ -2993,9 +2541,7 @@
 	      (const_int 1))))]
   "TARGET_SIMD"
   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
-  [(set_attr "simd_type" "simd_sat_mlal")
-   (set_attr "type" "neon_sat_mla_<Vetype>_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
 )
 
 ;; vqdml[sa]l_lane
@@ -3017,9 +2563,7 @@
 	    (const_int 1))))]
   "TARGET_SIMD"
   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"
-  [(set_attr "simd_type" "simd_sat_mlal")
-   (set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal"
@@ -3038,9 +2582,7 @@
 	    (const_int 1))))]
   "TARGET_SIMD"
   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"
-  [(set_attr "simd_type" "simd_sat_mlal")
-   (set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
 (define_expand "aarch64_sqdmlal_lane<mode>"
@@ -3119,9 +2661,7 @@
 	      (const_int 1))))]
   "TARGET_SIMD"
   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
-  [(set_attr "simd_type" "simd_sat_mlal")
-   (set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
 ;; sqdml[as]l2
@@ -3143,9 +2683,7 @@
              (const_int 1))))]
   "TARGET_SIMD"
   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
-  [(set_attr "simd_type" "simd_sat_mlal")
-   (set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
 (define_expand "aarch64_sqdmlal2<mode>"
@@ -3195,9 +2733,7 @@
 	      (const_int 1))))]
   "TARGET_SIMD"
   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"
-  [(set_attr "simd_type" "simd_sat_mlal")
-   (set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
 (define_expand "aarch64_sqdmlal2_lane<mode>"
@@ -3280,9 +2816,7 @@
 	    (const_int 1))))]
   "TARGET_SIMD"
   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
-  [(set_attr "simd_type" "simd_sat_mlal")
-   (set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
 (define_expand "aarch64_sqdmlal2_n<mode>"
@@ -3326,9 +2860,7 @@
 	     (const_int 1)))]
   "TARGET_SIMD"
   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
-  [(set_attr "simd_type" "simd_sat_mul")
-   (set_attr "type" "neon_sat_mul_<Vetype>_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
 )
 
 ;; vqdmull_lane
@@ -3348,9 +2880,7 @@
 	     (const_int 1)))]
   "TARGET_SIMD"
   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"
-  [(set_attr "simd_type" "simd_sat_mul")
-   (set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
 )
 
 (define_insn "aarch64_sqdmull_lane<mode>_internal"
@@ -3367,9 +2897,7 @@
 	     (const_int 1)))]
   "TARGET_SIMD"
   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"
-  [(set_attr "simd_type" "simd_sat_mul")
-   (set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
 )
 
 (define_expand "aarch64_sqdmull_lane<mode>"
@@ -3413,9 +2941,7 @@
 	     (const_int 1)))]
   "TARGET_SIMD"
   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
-  [(set_attr "simd_type" "simd_sat_mul")
-   (set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
 )
 
 ;; vqdmull2
@@ -3438,9 +2964,7 @@
 	     (const_int 1)))]
   "TARGET_SIMD"
   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
-  [(set_attr "simd_type" "simd_sat_mul")
-   (set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
 )
 
 (define_expand "aarch64_sqdmull2<mode>"
@@ -3474,9 +2998,7 @@
 	     (const_int 1)))]
   "TARGET_SIMD"
   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"
-  [(set_attr "simd_type" "simd_sat_mul")
-   (set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
 )
 
 (define_expand "aarch64_sqdmull2_lane<mode>"
@@ -3526,9 +3048,7 @@
 	     (const_int 1)))]
   "TARGET_SIMD"
   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
-  [(set_attr "simd_type" "simd_sat_mul")
-   (set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
 )
 
 (define_expand "aarch64_sqdmull2_n<mode>"
@@ -3553,9 +3073,7 @@
          VSHL))]
   "TARGET_SIMD"
   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
-  [(set_attr "simd_type" "simd_shift")
-   (set_attr "type" "neon_shift_reg<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_shift_reg<q>")]
 )
 
 
@@ -3569,9 +3087,7 @@
          VQSHL))]
   "TARGET_SIMD"
   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
-  [(set_attr "simd_type" "simd_sat_shift")
-   (set_attr "type" "neon_sat_shift_reg<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_shift_reg<q>")]
 )
 
 ;; vshll_n
@@ -3592,9 +3108,7 @@
   else {
     return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
   }"
-  [(set_attr "simd_type" "simd_shift_imm")
-   (set_attr "type" "neon_shift_imm_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_shift_imm_long")]
 )
 
 ;; vshll_high_n
@@ -3615,9 +3129,7 @@
   else {
     return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
   }"
-  [(set_attr "simd_type" "simd_shift_imm")
-   (set_attr "type" "neon_shift_imm_long")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_shift_imm_long")]
 )
 
 ;; vrshr_n
@@ -3632,9 +3144,7 @@
   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
   aarch64_simd_const_bounds (operands[2], 1, bit_width + 1);
   return \"<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2\";"
-  [(set_attr "simd_type" "simd_shift_imm")
-   (set_attr "type" "neon_sat_shift_imm<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_shift_imm<q>")]
 )
 
 ;; v(r)sra_n
@@ -3650,9 +3160,7 @@
   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
   aarch64_simd_const_bounds (operands[3], 1, bit_width + 1);
   return \"<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\";"
-  [(set_attr "simd_type" "simd_shift_imm_acc")
-   (set_attr "type" "neon_shift_acc<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_shift_acc<q>")]
 )
 
 ;; vs<lr>i_n
@@ -3669,9 +3177,7 @@
   aarch64_simd_const_bounds (operands[3], 1 - <VSLRI:offsetlr>,
                              bit_width - <VSLRI:offsetlr> + 1);
   return \"s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\";"
-  [(set_attr "simd_type" "simd_shift_imm")
-   (set_attr "type" "neon_shift_imm<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_shift_imm<q>")]
 )
 
 ;; vqshl(u)
@@ -3686,9 +3192,7 @@
   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
   aarch64_simd_const_bounds (operands[2], 0, bit_width);
   return \"<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2\";"
-  [(set_attr "simd_type" "simd_sat_shift_imm")
-   (set_attr "type" "neon_sat_shift_imm<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_shift_imm<q>")]
 )
 
 
@@ -3704,9 +3208,7 @@
   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
   aarch64_simd_const_bounds (operands[2], 1, bit_width + 1);
   return \"<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2\";"
-  [(set_attr "simd_type" "simd_sat_shiftn_imm")
-   (set_attr "type" "neon_sat_shift_imm_narrow_q")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
 
@@ -3725,9 +3227,7 @@
   "@
   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
-  [(set_attr "simd_type" "simd_cmp")
-   (set_attr "type" "neon_compare<q>, neon_compare_zero<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
 )
 
 (define_insn_and_split "aarch64_cm<optab>di"
@@ -3756,9 +3256,7 @@
     emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
     DONE;
   }
-  [(set_attr "simd_type" "simd_cmp")
-   (set_attr "type" "neon_compare, neon_compare_zero, multiple")
-   (set_attr "simd_mode" "DI")]
+  [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
 )
 
 ;; cm(hs|hi)
@@ -3772,9 +3270,7 @@
 	  )))]
   "TARGET_SIMD"
   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
-  [(set_attr "simd_type" "simd_cmp")
-   (set_attr "type" "neon_compare<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_compare<q>")]
 )
 
 (define_insn_and_split "aarch64_cm<optab>di"
@@ -3802,9 +3298,7 @@
     emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
     DONE;
   }
-  [(set_attr "simd_type" "simd_cmp")
-   (set_attr "type" "neon_compare, neon_compare_zero")
-   (set_attr "simd_mode" "DI")]
+  [(set_attr "type" "neon_compare, neon_compare_zero")]
 )
 
 ;; cmtst
@@ -3819,9 +3313,7 @@
 	    (vec_duplicate:<V_cmp_result> (const_int 0)))))]
   "TARGET_SIMD"
   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
-  [(set_attr "simd_type" "simd_cmp")
-   (set_attr "type" "neon_tst<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_tst<q>")]
 )
 
 (define_insn_and_split "aarch64_cmtstdi"
@@ -3851,9 +3343,7 @@
     emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
     DONE;
   }
-  [(set_attr "simd_type" "simd_cmp")
-   (set_attr "type" "neon_tst")
-   (set_attr "simd_mode" "DI")]
+  [(set_attr "type" "neon_tst")]
 )
 
 ;; fcm(eq|ge|gt|le|lt)
@@ -3869,9 +3359,7 @@
   "@
   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
-  [(set_attr "simd_type" "simd_fcmp")
-   (set_attr "type" "neon_fp_compare_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
 )
 
 ;; fac(ge|gt)
@@ -3887,9 +3375,7 @@
   )))]
   "TARGET_SIMD"
   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
-  [(set_attr "simd_type" "simd_fcmp")
-   (set_attr "type" "neon_fp_compare_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
 )
 
 ;; addp
@@ -3902,9 +3388,7 @@
           UNSPEC_ADDP))]
   "TARGET_SIMD"
   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
-  [(set_attr "simd_type" "simd_add")
-   (set_attr "type" "neon_reduc_add<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_reduc_add<q>")]
 )
 
 (define_insn "aarch64_addpdi"
@@ -3914,9 +3398,7 @@
           UNSPEC_ADDP))]
   "TARGET_SIMD"
   "addp\t%d0, %1.2d"
-  [(set_attr "simd_type" "simd_add")
-   (set_attr "type" "neon_reduc_add")
-   (set_attr "simd_mode" "DI")]
+  [(set_attr "type" "neon_reduc_add")]
 )
 
 ;; sqrt
@@ -3926,9 +3408,7 @@
         (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_fsqrt")
-   (set_attr "type" "neon_fp_sqrt_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
 )
 
 ;; Patterns for vector struct loads and stores.
@@ -3940,9 +3420,8 @@
 		   UNSPEC_LD2))]
   "TARGET_SIMD"
   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
-  [(set_attr "simd_type" "simd_load2")
-   (set_attr "type" "neon_load2_2reg<q>")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_load2_2reg<q>")]
+)
 
 (define_insn "vec_store_lanesoi<mode>"
   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
@@ -3951,9 +3430,8 @@
                    UNSPEC_ST2))]
   "TARGET_SIMD"
   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
-  [(set_attr "simd_type" "simd_store2")
-   (set_attr "type" "neon_store2_2reg<q>")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_store2_2reg<q>")]
+)
 
 (define_insn "vec_load_lanesci<mode>"
   [(set (match_operand:CI 0 "register_operand" "=w")
@@ -3962,9 +3440,8 @@
 		   UNSPEC_LD3))]
   "TARGET_SIMD"
   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
-  [(set_attr "simd_type" "simd_load3")
-   (set_attr "type" "neon_load3_3reg<q>")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_load3_3reg<q>")]
+)
 
 (define_insn "vec_store_lanesci<mode>"
   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
@@ -3973,9 +3450,8 @@
                    UNSPEC_ST3))]
   "TARGET_SIMD"
   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
-  [(set_attr "simd_type" "simd_store3")
-   (set_attr "type" "neon_store3_3reg<q>")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_store3_3reg<q>")]
+)
 
 (define_insn "vec_load_lanesxi<mode>"
   [(set (match_operand:XI 0 "register_operand" "=w")
@@ -3984,9 +3460,8 @@
 		   UNSPEC_LD4))]
   "TARGET_SIMD"
   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
-  [(set_attr "simd_type" "simd_load4")
-   (set_attr "type" "neon_load4_4reg<q>")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_load4_4reg<q>")]
+)
 
 (define_insn "vec_store_lanesxi<mode>"
   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
@@ -3995,9 +3470,8 @@
                    UNSPEC_ST4))]
   "TARGET_SIMD"
   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
-  [(set_attr "simd_type" "simd_store4")
-   (set_attr "type" "neon_store4_4reg<q>")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_store4_4reg<q>")]
+)
 
 ;; Reload patterns for AdvSIMD register list operands.
 
@@ -4029,11 +3503,10 @@
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "simd_type" "simd_move,simd_store<nregs>,simd_load<nregs>")
-   (set_attr "type" "neon_move,neon_store<nregs>_<nregs>reg_q,\
+  [(set_attr "type" "neon_move,neon_store<nregs>_<nregs>reg_q,\
                      neon_load<nregs>_<nregs>reg_q")
-   (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))
-   (set_attr "simd_mode" "<MODE>")])
+   (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
+)
 
 (define_split
   [(set (match_operand:OI 0 "register_operand" "")
@@ -4115,9 +3588,8 @@
 	     (vec_duplicate:VD (const_int 0)))) 0))]
   "TARGET_SIMD"
   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
-  [(set_attr "simd_type" "simd_load2")
-   (set_attr "type" "neon_load2_2reg<q>")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_load2_2reg<q>")]
+)
 
 (define_insn "aarch64_ld2<mode>_dreg"
   [(set (match_operand:OI 0 "register_operand" "=w")
@@ -4133,9 +3605,8 @@
 	     (const_int 0))) 0))]
   "TARGET_SIMD"
   "ld1\\t{%S0.1d - %T0.1d}, %1"
-  [(set_attr "simd_type" "simd_load2")
-   (set_attr "type" "neon_load1_2reg<q>")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_load1_2reg<q>")]
+)
 
 (define_insn "aarch64_ld3<mode>_dreg"
   [(set (match_operand:CI 0 "register_operand" "=w")
@@ -4156,9 +3627,8 @@
 	     (vec_duplicate:VD (const_int 0)))) 0))]
   "TARGET_SIMD"
   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
-  [(set_attr "simd_type" "simd_load3")
-   (set_attr "type" "neon_load3_3reg<q>")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_load3_3reg<q>")]
+)
 
 (define_insn "aarch64_ld3<mode>_dreg"
   [(set (match_operand:CI 0 "register_operand" "=w")
@@ -4179,9 +3649,8 @@
 	     (const_int 0))) 0))]
   "TARGET_SIMD"
   "ld1\\t{%S0.1d - %U0.1d}, %1"
-  [(set_attr "simd_type" "simd_load3")
-   (set_attr "type" "neon_load1_3reg<q>")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_load1_3reg<q>")]
+)
 
 (define_insn "aarch64_ld4<mode>_dreg"
   [(set (match_operand:XI 0 "register_operand" "=w")
@@ -4207,9 +3676,8 @@
 	       (vec_duplicate:VD (const_int 0))))) 0))]
   "TARGET_SIMD"
   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
-  [(set_attr "simd_type" "simd_load4")
-   (set_attr "type" "neon_load4_4reg<q>")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_load4_4reg<q>")]
+)
 
 (define_insn "aarch64_ld4<mode>_dreg"
   [(set (match_operand:XI 0 "register_operand" "=w")
@@ -4235,9 +3703,8 @@
 	       (const_int 0)))) 0))]
   "TARGET_SIMD"
   "ld1\\t{%S0.1d - %V0.1d}, %1"
-  [(set_attr "simd_type" "simd_load4")
-   (set_attr "type" "neon_load1_4reg<q>")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_load1_4reg<q>")]
+)
 
 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
  [(match_operand:VSTRUCT 0 "register_operand" "=w")
@@ -4351,9 +3818,7 @@
 		   UNSPEC_TBL))]
   "TARGET_SIMD"
   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_tbl")
-   (set_attr "type" "neon_tbl1<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_tbl1<q>")]
 )
 
 ;; Two source registers.
@@ -4365,9 +3830,7 @@
 		      UNSPEC_TBL))]
   "TARGET_SIMD"
   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
-  [(set_attr "simd_type" "simd_tbl")
-   (set_attr "type" "neon_tbl2_q")
-   (set_attr "simd_mode" "V16QI")]
+  [(set_attr "type" "neon_tbl2_q")]
 )
 
 (define_insn_and_split "aarch64_combinev16qi"
@@ -4393,9 +3856,7 @@
 		       PERMUTE))]
   "TARGET_SIMD"
   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "simd_type" "simd_<PERMUTE:perm_insn>")
-   (set_attr "type" "neon_permute<q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_permute<q>")]
 )
 
 (define_insn "aarch64_st2<mode>_dreg"
@@ -4405,9 +3866,8 @@
                    UNSPEC_ST2))]
   "TARGET_SIMD"
   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
-  [(set_attr "simd_type" "simd_store2")
-   (set_attr "type" "neon_store2_2reg")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_store2_2reg")]
+)
 
 (define_insn "aarch64_st2<mode>_dreg"
   [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
@@ -4416,9 +3876,8 @@
                    UNSPEC_ST2))]
   "TARGET_SIMD"
   "st1\\t{%S1.1d - %T1.1d}, %0"
-  [(set_attr "simd_type" "simd_store2")
-   (set_attr "type" "neon_store1_2reg")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_store1_2reg")]
+)
 
 (define_insn "aarch64_st3<mode>_dreg"
   [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
@@ -4427,9 +3886,8 @@
                    UNSPEC_ST3))]
   "TARGET_SIMD"
   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
-  [(set_attr "simd_type" "simd_store3")
-   (set_attr "type" "neon_store3_3reg")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_store3_3reg")]
+)
 
 (define_insn "aarch64_st3<mode>_dreg"
   [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
@@ -4438,9 +3896,8 @@
                    UNSPEC_ST3))]
   "TARGET_SIMD"
   "st1\\t{%S1.1d - %U1.1d}, %0"
-  [(set_attr "simd_type" "simd_store3")
-   (set_attr "type" "neon_store1_3reg")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_store1_3reg")]
+)
 
 (define_insn "aarch64_st4<mode>_dreg"
   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
@@ -4449,9 +3906,8 @@
                    UNSPEC_ST4))]
   "TARGET_SIMD"
   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
-  [(set_attr "simd_type" "simd_store4")
-   (set_attr "type" "neon_store4_4reg")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_store4_4reg")]
+)
 
 (define_insn "aarch64_st4<mode>_dreg"
   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
@@ -4460,9 +3916,8 @@
                    UNSPEC_ST4))]
   "TARGET_SIMD"
   "st1\\t{%S1.1d - %V1.1d}, %0"
-  [(set_attr "simd_type" "simd_store4")
-   (set_attr "type" "neon_store1_4reg")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_store1_4reg")]
+)
 
 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
  [(match_operand:DI 0 "register_operand" "r")
@@ -4540,9 +3995,8 @@
 	  (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
   "TARGET_SIMD"
   "ld1r\\t{%0.<Vtype>}, %1"
-  [(set_attr "simd_type" "simd_load1r")
-   (set_attr "type" "neon_load1_all_lanes")
-   (set_attr "simd_mode" "<MODE>")])
+  [(set_attr "type" "neon_load1_all_lanes")]
+)
 
 (define_insn "aarch64_frecpe<mode>"
   [(set (match_operand:VDQF 0 "register_operand" "=w")
@@ -4550,9 +4004,7 @@
 		    UNSPEC_FRECPE))]
   "TARGET_SIMD"
   "frecpe\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_frecpe")
-   (set_attr "type" "neon_fp_recpe_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
 )
 
 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
@@ -4561,9 +4013,7 @@
 		    FRECP))]
   "TARGET_SIMD"
   "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
-  [(set_attr "simd_type" "simd_frecp<FRECP:frecp_suffix>")
-   (set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
 )
 
 (define_insn "aarch64_frecps<mode>"
@@ -4573,8 +4023,21 @@
 		    UNSPEC_FRECPS))]
   "TARGET_SIMD"
   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
-  [(set_attr "simd_type" "simd_frecps")
-   (set_attr "type" "neon_fp_recps_<Vetype><q>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
+)
+
+;; Standard pattern name vec_extract<mode>.
+
+(define_insn "vec_extract<mode>"
+  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
+	(vec_select:<VEL>
+	  (match_operand:VALL 1 "register_operand" "w, w, w")
+	  (parallel [(match_operand:SI 2 "immediate_operand" "i,i,i")])))]
+  "TARGET_SIMD"
+  "@
+  umov\\t%<vw>0, %1.<Vetype>[%2]
+  dup\\t%<Vetype>0, %1.<Vetype>[%2]
+  st1\\t{%1.<Vetype>}[%2], %0"
+  [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
 )
 
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index 02699e35c3f..84081d1ba57 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-	"cortexa53,cortexa57,large,small"
+	"cortexa53,cortexa15"
 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 8458cac34f2..aad9a298761 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -26,6 +26,10 @@
 #include "rtl.h"
 #include "insn-attr.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
 #include "regs.h"
 #include "df.h"
 #include "hard-reg-set.h"
@@ -43,10 +47,12 @@
 #include "langhooks.h"
 #include "diagnostic-core.h"
 #include "gimple.h"
+#include "gimplify.h"
 #include "optabs.h"
 #include "dwarf2.h"
 #include "cfgloop.h"
 #include "tree-vectorizer.h"
+#include "config/arm/aarch-cost-tables.h"
 
 /* Defined for convenience.  */
 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
@@ -127,7 +133,7 @@ static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 						 const unsigned char *sel);
 
 /* The processor for which instructions should be scheduled.  */
-enum aarch64_processor aarch64_tune = generic;
+enum aarch64_processor aarch64_tune = cortexa53;
 
 /* The current tuning set.  */
 const struct tune_params *aarch64_tune_params;
@@ -149,21 +155,6 @@ unsigned long aarch64_tune_flags = 0;
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
 #endif
-static const struct cpu_rtx_cost_table generic_rtx_cost_table =
-{
-  NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
-  NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
-  NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
-  NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
-  NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
-  NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
-  NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
-  NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
-  NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
-  NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
-  NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
-  NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
-};
 
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
@@ -216,7 +207,7 @@ __extension__
 #endif
 static const struct tune_params generic_tunings =
 {
-  &generic_rtx_cost_table,
+  &generic_extra_costs,
   &generic_addrcost_table,
   &generic_regmove_cost,
   &generic_vector_cost,
@@ -240,7 +231,7 @@ static const struct processor all_cores[] =
   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 #include "aarch64-cores.def"
 #undef AARCH64_CORE
-  {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
+  {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
   {NULL, aarch64_none, NULL, 0, NULL}
 };
 
@@ -251,7 +242,6 @@ static const struct processor all_architectures[] =
   {NAME, CORE, #ARCH, FLAGS, NULL},
 #include "aarch64-arches.def"
 #undef AARCH64_ARCH
-  {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
   {NULL, aarch64_none, NULL, 0, NULL}
 };
 
@@ -1810,7 +1800,8 @@ aarch64_save_or_restore_fprs (int start_offset, int increment,
   unsigned regno;
   unsigned regno2;
   rtx insn;
-  rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
+  rtx (*gen_mem_ref)(enum machine_mode, rtx)
+    = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
 
 
   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
@@ -1853,16 +1844,17 @@ aarch64_save_or_restore_fprs (int start_offset, int increment,
 		    ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
 				       gen_rtx_REG (DFmode, regno2), mem2));
 
-		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
-		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
+		  add_reg_note (insn, REG_CFA_RESTORE,
+				gen_rtx_REG (DFmode, regno));
+		  add_reg_note (insn, REG_CFA_RESTORE,
+				gen_rtx_REG (DFmode, regno2));
 		}
 
 		  /* The first part of a frame-related parallel insn
 		     is always assumed to be relevant to the frame
 		     calculations; subsequent parts, are only
 		     frame-related if explicitly marked.  */
-	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
-					    1)) = 1;
+	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
 	      regno = regno2;
 	      start_offset += increment * 2;
 	    }
@@ -1873,7 +1865,8 @@ aarch64_save_or_restore_fprs (int start_offset, int increment,
 	      else
 		{
 		  insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
-		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
+		  add_reg_note (insn, REG_CFA_RESTORE,
+				gen_rtx_REG (DImode, regno));
 		}
 	      start_offset += increment;
 	    }
@@ -2010,7 +2003,7 @@ aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
    Establish the stack frame by decreasing the stack pointer with a
    properly calculated size and, if necessary, create a frame record
    filled with the values of LR and previous frame pointer.  The
-   current FP is also set up is it is in use.  */
+   current FP is also set up if it is in use.  */
 
 void
 aarch64_expand_prologue (void)
@@ -4143,7 +4136,7 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
 	 return offset - crtl->outgoing_args_size;
 
        if (from == FRAME_POINTER_REGNUM)
-	 return cfun->machine->frame.saved_regs_size;
+	 return cfun->machine->frame.saved_regs_size + get_frame_size ();
      }
 
    if (to == STACK_POINTER_REGNUM)
@@ -4152,6 +4145,7 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
          {
            HOST_WIDE_INT elim = crtl->outgoing_args_size
                               + cfun->machine->frame.saved_regs_size
+                              + get_frame_size ()
                               - cfun->machine->frame.fp_lr_offset;
            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
            return elim;
@@ -4490,7 +4484,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 		   int param ATTRIBUTE_UNUSED, int *cost, bool speed)
 {
   rtx op0, op1;
-  const struct cpu_rtx_cost_table *extra_cost
+  const struct cpu_cost_table *extra_cost
     = aarch64_tune_params->insn_extra_cost;
 
   switch (code)
@@ -4503,7 +4497,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 	{
 	case MEM:
 	  if (speed)
-	    *cost += extra_cost->memory_store;
+	    *cost += extra_cost->ldst.store;
 
 	  if (op1 != const0_rtx)
 	    *cost += rtx_cost (op1, SET, 1, speed);
@@ -4540,7 +4534,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 
     case MEM:
       if (speed)
-	*cost += extra_cost->memory_load;
+	*cost += extra_cost->ldst.load;
 
       return true;
 
@@ -4626,7 +4620,8 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 					    speed)
 				+ rtx_cost (op1, PLUS, 1, speed));
 		      if (speed)
-			*cost += extra_cost->int_multiply_extend_add;
+			*cost +=
+			  extra_cost->mult[GET_MODE (x) == DImode].extend_add;
 		      return true;
 		    }
 		  *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
@@ -4634,7 +4629,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 			    + rtx_cost (op1, PLUS, 1, speed));
 
 		  if (speed)
-		    *cost += extra_cost->int_multiply_add;
+		    *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
 		}
 
 	      *cost += (rtx_cost (new_op0, PLUS, 0, speed)
@@ -4700,7 +4695,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 
       /* Shifting by a register often takes an extra cycle.  */
       if (speed && !CONST_INT_P (XEXP (x, 1)))
-	*cost += extra_cost->register_shift;
+	*cost += extra_cost->alu.arith_shift_reg;
 
       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
       return true;
@@ -4743,19 +4738,19 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 	      *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
 			+ rtx_cost (XEXP (op1, 0), MULT, 1, speed));
 	      if (speed)
-		*cost += extra_cost->int_multiply_extend;
+		*cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
 	      return true;
 	    }
 
 	  if (speed)
-	    *cost += extra_cost->int_multiply;
+	    *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
 	}
       else if (speed)
 	{
 	  if (GET_MODE (x) == DFmode)
-	    *cost += extra_cost->double_multiply;
+	    *cost += extra_cost->fp[1].mult;
 	  else if (GET_MODE (x) == SFmode)
-	    *cost += extra_cost->float_multiply;
+	    *cost += extra_cost->fp[0].mult;
 	}
 
       return false;  /* All arguments need to be in registers.  */
@@ -4766,14 +4761,14 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
       if (speed)
 	{
 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-	    *cost += (extra_cost->int_multiply_add
-		      + extra_cost->int_divide);
+	    *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
+		      + extra_cost->mult[GET_MODE (x) == DImode].idiv);
 	  else if (GET_MODE (x) == DFmode)
-	    *cost += (extra_cost->double_multiply
-		      + extra_cost->double_divide);
+	    *cost += (extra_cost->fp[1].mult
+		      + extra_cost->fp[1].div);
 	  else if (GET_MODE (x) == SFmode)
-	    *cost += (extra_cost->float_multiply
-		      + extra_cost->float_divide);
+	    *cost += (extra_cost->fp[0].mult
+		      + extra_cost->fp[0].div);
 	}
       return false;  /* All arguments need to be in registers.  */
 
@@ -4783,11 +4778,11 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
       if (speed)
 	{
 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-	    *cost += extra_cost->int_divide;
+	    *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
 	  else if (GET_MODE (x) == DFmode)
-	    *cost += extra_cost->double_divide;
+	    *cost += extra_cost->fp[1].div;
 	  else if (GET_MODE (x) == SFmode)
-	    *cost += extra_cost->float_divide;
+	    *cost += extra_cost->fp[0].div;
 	}
       return false;  /* All arguments need to be in registers.  */
 
@@ -5183,7 +5178,7 @@ aarch64_override_options (void)
 
   /* If the user did not specify a processor, choose the default
      one for them.  This will be the CPU set during configuration using
-     --with-cpu, otherwise it is "generic".  */
+     --with-cpu, otherwise it is "coretex-a53".  */
   if (!selected_cpu)
     {
       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
@@ -6042,18 +6037,18 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
 	if (count == -1
 	    || !index
 	    || !TYPE_MAX_VALUE (index)
-	    || !host_integerp (TYPE_MAX_VALUE (index), 1)
+	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
 	    || !TYPE_MIN_VALUE (index)
-	    || !host_integerp (TYPE_MIN_VALUE (index), 1)
+	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
 	    || count < 0)
 	  return -1;
 
-	count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
-		      - tree_low_cst (TYPE_MIN_VALUE (index), 1));
+	count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
+		      - tree_to_uhwi (TYPE_MIN_VALUE (index)));
 
 	/* There must be no padding.  */
-	if (!host_integerp (TYPE_SIZE (type), 1)
-	    || (tree_low_cst (TYPE_SIZE (type), 1)
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
 		!= count * GET_MODE_BITSIZE (*modep)))
 	  return -1;
 
@@ -6082,8 +6077,8 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
 	  }
 
 	/* There must be no padding.  */
-	if (!host_integerp (TYPE_SIZE (type), 1)
-	    || (tree_low_cst (TYPE_SIZE (type), 1)
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
 		!= count * GET_MODE_BITSIZE (*modep)))
 	  return -1;
 
@@ -6114,8 +6109,8 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
 	  }
 
 	/* There must be no padding.  */
-	if (!host_integerp (TYPE_SIZE (type), 1)
-	    || (tree_low_cst (TYPE_SIZE (type), 1)
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
 		!= count * GET_MODE_BITSIZE (*modep)))
 	  return -1;
 
@@ -6875,7 +6870,7 @@ aarch64_simd_attr_length_move (rtx insn)
 static HOST_WIDE_INT
 aarch64_simd_vector_alignment (const_tree type)
 {
-  HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
+  HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
   return MIN (align, 128);
 }
 
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 7a80e96385f..228115f50fc 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -443,7 +443,7 @@ enum reg_class
 #define INDEX_REG_CLASS	CORE_REGS
 #define BASE_REG_CLASS  POINTER_REGS
 
-/* Register pairs used to eliminate unneeded registers that point intoi
+/* Register pairs used to eliminate unneeded registers that point into
    the stack frame.  */
 #define ELIMINABLE_REGS							\
 {									\
@@ -468,10 +468,10 @@ enum target_cpus
   TARGET_CPU_generic
 };
 
-/* If there is no CPU defined at configure, use "generic" as default.  */
+/* If there is no CPU defined at configure, use "cortex-a53" as default.  */
 #ifndef TARGET_CPU_DEFAULT
 #define TARGET_CPU_DEFAULT \
-  (TARGET_CPU_generic | (AARCH64_CPU_DEFAULT_FLAGS << 6))
+  (TARGET_CPU_cortexa53 | (AARCH64_CPU_DEFAULT_FLAGS << 6))
 #endif
 
 /* The processor for which instructions should be scheduled.  */
@@ -484,7 +484,7 @@ extern enum aarch64_processor aarch64_tune;
 /* Stack layout; function entry, exit and calling.  */
 #define STACK_GROWS_DOWNWARD	1
 
-#define FRAME_GROWS_DOWNWARD	0
+#define FRAME_GROWS_DOWNWARD	1
 
 #define STARTING_FRAME_OFFSET	0
 
@@ -789,13 +789,13 @@ do {									     \
 
 /* Emit rtl for profiling.  Output assembler code to FILE
    to call "_mcount" for profiling a function entry.  */
-#define PROFILE_HOOK(LABEL)                                    \
-{                                                              \
-  rtx fun,lr;                                                  \
-  lr = get_hard_reg_initial_val (Pmode, LR_REGNUM);            \
-  fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME);               \
-  emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \
-}
+#define PROFILE_HOOK(LABEL)						\
+  {									\
+    rtx fun, lr;							\
+    lr = get_hard_reg_initial_val (Pmode, LR_REGNUM);			\
+    fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME);			\
+    emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode);	\
+  }
 
 /* All the work done in PROFILE_HOOK, but still required.  */
 #define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 758be47420e..8b3dbd7550e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -117,163 +117,6 @@
 ;; Instruction types and attributes
 ;; -------------------------------------------------------------------
 
-;; Main data types used by the insntructions
-
-(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF"
-  (const_string "unknown"))
-
-(define_attr "mode2" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF"
-  (const_string "unknown"))
-
-; The "v8type" attribute is used to for fine grained classification of
-; AArch64 instructions.  This table briefly explains the meaning of each type.
-
-; adc              add/subtract with carry.
-; adcs             add/subtract with carry (setting condition flags).
-; adr              calculate address.
-; alu              simple alu instruction (no memory or fp regs access).
-; alu_ext          simple alu instruction (sign/zero-extended register).
-; alu_shift        simple alu instruction, with a source operand shifted by a constant.
-; alus             simple alu instruction (setting condition flags).
-; alus_ext         simple alu instruction (sign/zero-extended register, setting condition flags).
-; alus_shift       simple alu instruction, with a source operand shifted by a constant (setting condition flags).
-; bfm              bitfield move operation.
-; branch           branch.
-; call             subroutine call.
-; ccmp             conditional compare.
-; clz              count leading zeros/sign bits.
-; csel             conditional select.
-; dmb              data memory barrier.
-; extend           sign/zero-extend (specialised bitfield move).
-; extr             extract register-sized bitfield encoding.
-; fpsimd_load      load single floating point / simd scalar register from memory.
-; fpsimd_load2     load pair of floating point / simd scalar registers from memory.
-; fpsimd_store     store single floating point / simd scalar register to memory.
-; fpsimd_store2    store pair floating point / simd scalar registers to memory.
-; fadd             floating point add/sub.
-; fccmp            floating point conditional compare.
-; fcmp             floating point comparison.
-; fconst           floating point load immediate.
-; fcsel            floating point conditional select.
-; fcvt             floating point convert (float to float).
-; fcvtf2i          floating point convert (float to integer).
-; fcvti2f          floating point convert (integer to float).
-; fdiv             floating point division operation.
-; ffarith          floating point abs, neg or cpy.
-; fmadd            floating point multiply-add/sub.
-; fminmax          floating point min/max.
-; fmov             floating point move (float to float).
-; fmovf2i          floating point move (float to integer).
-; fmovi2f          floating point move (integer to float).
-; fmul             floating point multiply.
-; frint            floating point round to integral.
-; fsqrt            floating point square root.
-; load_acq         load-acquire.
-; load             load single general register from memory
-; load2            load pair of general registers from memory
-; logic            logical operation (register).
-; logic_imm        and/or/xor operation (immediate).
-; logic_shift      logical operation with shift.
-; logics           logical operation (register, setting condition flags).
-; logics_imm       and/or/xor operation (immediate, setting condition flags).
-; logics_shift     logical operation with shift (setting condition flags).
-; madd             integer multiply-add/sub.
-; maddl            widening integer multiply-add/sub.
-; misc             miscellaneous - any type that doesn't fit into the rest.
-; move             integer move operation.
-; move2            double integer move operation.
-; movk             move 16-bit immediate with keep.
-; movz             move 16-bit immmediate with zero/one.
-; mrs              system/special register move.
-; mulh             64x64 to 128-bit multiply (high part).
-; mull             widening multiply.
-; mult             integer multiply instruction.
-; prefetch         memory prefetch.
-; rbit             reverse bits.
-; rev              reverse bytes.
-; sdiv             integer division operation (signed).
-; shift            variable shift operation.
-; shift_imm        immediate shift operation (specialised bitfield move).
-; store_rel        store-release.
-; store            store single general register to memory.
-; store2           store pair of general registers to memory.
-; udiv             integer division operation (unsigned).
-
-(define_attr "v8type"
-   "adc,\
-   adcs,\
-   adr,\
-   alu,\
-   alu_ext,\
-   alu_shift,\
-   alus,\
-   alus_ext,\
-   alus_shift,\
-   bfm,\
-   branch,\
-   call,\
-   ccmp,\
-   clz,\
-   csel,\
-   dmb,\
-   div,\
-   div64,\
-   extend,\
-   extr,\
-   fpsimd_load,\
-   fpsimd_load2,\
-   fpsimd_store2,\
-   fpsimd_store,\
-   fadd,\
-   fccmp,\
-   fcvt,\
-   fcvtf2i,\
-   fcvti2f,\
-   fcmp,\
-   fconst,\
-   fcsel,\
-   fdiv,\
-   ffarith,\
-   fmadd,\
-   fminmax,\
-   fmov,\
-   fmovf2i,\
-   fmovi2f,\
-   fmul,\
-   frint,\
-   fsqrt,\
-   load_acq,\
-   load1,\
-   load2,\
-   logic,\
-   logic_imm,\
-   logic_shift,\
-   logics,\
-   logics_imm,\
-   logics_shift,\
-   madd,\
-   maddl,\
-   misc,\
-   move,\
-   move2,\
-   movk,\
-   movz,\
-   mrs,\
-   mulh,\
-   mull,\
-   mult,\
-   prefetch,\
-   rbit,\
-   rev,\
-   sdiv,\
-   shift,\
-   shift_imm,\
-   store_rel,\
-   store1,\
-   store2,\
-   udiv"
-  (const_string "alu"))
-
 ; The "type" attribute is is included here from AArch32 backend to be able
 ; to share pipeline descriptions.
 (include "../arm/types.md")
@@ -312,15 +155,13 @@
 
 (define_attr "generic_sched" "yes,no"
   (const (if_then_else
-          (eq_attr "tune" "large,small,cortexa53")
+          (eq_attr "tune" "cortexa53,cortexa15")
           (const_string "no")
           (const_string "yes"))))
 
 ;; Scheduling
-(include "aarch64-generic.md")
-(include "large.md")
-(include "small.md")
 (include "../arm/cortex-a53.md")
+(include "../arm/cortex-a15.md")
 
 ;; -------------------------------------------------------------------
 ;; Jumps and other miscellaneous insns
@@ -330,16 +171,14 @@
   [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
   ""
   "br\\t%0"
-  [(set_attr "v8type" "branch")
-   (set_attr "type" "branch")]
+  [(set_attr "type" "branch")]
 )
 
 (define_insn "jump"
   [(set (pc) (label_ref (match_operand 0 "" "")))]
   ""
   "b\\t%l0"
-  [(set_attr "v8type" "branch")
-   (set_attr "type" "branch")]
+  [(set_attr "type" "branch")]
 )
 
 (define_expand "cbranch<mode>4"
@@ -377,8 +216,7 @@
 			   (pc)))]
   ""
   "b%m0\\t%l2"
-  [(set_attr "v8type" "branch")
-   (set_attr "type" "branch")]
+  [(set_attr "type" "branch")]
 )
 
 (define_expand "casesi"
@@ -442,7 +280,6 @@
   return aarch64_output_casesi (operands);
   "
   [(set_attr "length" "16")
-   (set_attr "v8type" "branch")
    (set_attr "type" "branch")]
 )
 
@@ -450,7 +287,7 @@
   [(unspec[(const_int 0)] UNSPEC_NOP)]
   ""
   "nop"
-  [(set_attr "v8type" "misc")]
+  [(set_attr "type" "no_insn")]
 )
 
 (define_expand "prologue"
@@ -484,8 +321,7 @@
   [(return)]
   ""
   "ret"
-  [(set_attr "v8type" "branch")
-   (set_attr "type" "branch")]
+  [(set_attr "type" "branch")]
 )
 
 (define_insn "eh_return"
@@ -493,8 +329,7 @@
     UNSPECV_EH_RETURN)]
   ""
   "#"
-  [(set_attr "v8type" "branch")
-   (set_attr "type" "branch")]
+  [(set_attr "type" "branch")]
 
 )
 
@@ -515,8 +350,7 @@
 			   (pc)))]
   ""
   "<cbz>\\t%<w>0, %l1"
-  [(set_attr "v8type" "branch")
-   (set_attr "type" "branch")]
+  [(set_attr "type" "branch")]
 
 )
 
@@ -535,9 +369,7 @@
     return \"ubfx\\t%<w>3, %<w>0, %1, #1\;<cbz>\\t%<w>3, %l2\";
   return \"<tbz>\\t%<w>0, %1, %l2\";
   "
-  [(set_attr "v8type" "branch")
-   (set_attr "type" "branch")
-   (set_attr "mode" "<MODE>")
+  [(set_attr "type" "branch")
    (set (attr "length")
 	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
 			   (lt (minus (match_dup 2) (pc)) (const_int 32764)))
@@ -557,9 +389,7 @@
     return \"ubfx\\t%<w>2, %<w>0, <sizem1>, #1\;<cbz>\\t%<w>2, %l1\";
   return \"<tbz>\\t%<w>0, <sizem1>, %l1\";
   "
-  [(set_attr "v8type" "branch")
-   (set_attr "type" "branch")
-   (set_attr "mode" "<MODE>")
+  [(set_attr "type" "branch")
    (set (attr "length")
 	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768))
 			   (lt (minus (match_dup 1) (pc)) (const_int 32764)))
@@ -603,8 +433,7 @@
    (clobber (reg:DI LR_REGNUM))]
   ""
   "blr\\t%0"
-  [(set_attr "v8type" "call")
-   (set_attr "type" "call")]
+  [(set_attr "type" "call")]
 )
 
 (define_insn "*call_symbol"
@@ -615,8 +444,7 @@
   "GET_CODE (operands[0]) == SYMBOL_REF
    && !aarch64_is_long_call_p (operands[0])"
   "bl\\t%a0"
-  [(set_attr "v8type" "call")
-   (set_attr "type" "call")]
+  [(set_attr "type" "call")]
 )
 
 (define_expand "call_value"
@@ -653,8 +481,7 @@
    (clobber (reg:DI LR_REGNUM))]
   ""
   "blr\\t%1"
-  [(set_attr "v8type" "call")
-   (set_attr "type" "call")]
+  [(set_attr "type" "call")]
 
 )
 
@@ -667,8 +494,7 @@
   "GET_CODE (operands[1]) == SYMBOL_REF
    && !aarch64_is_long_call_p (operands[1])"
   "bl\\t%a1"
-  [(set_attr "v8type" "call")
-   (set_attr "type" "call")]
+  [(set_attr "type" "call")]
 )
 
 (define_expand "sibcall"
@@ -703,8 +529,7 @@
    (use (match_operand 2 "" ""))]
   "GET_CODE (operands[0]) == SYMBOL_REF"
   "b\\t%a0"
-  [(set_attr "v8type" "branch")
-   (set_attr "type" "branch")]
+  [(set_attr "type" "branch")]
 
 )
 
@@ -716,8 +541,7 @@
    (use (match_operand 3 "" ""))]
   "GET_CODE (operands[1]) == SYMBOL_REF"
   "b\\t%a1"
-  [(set_attr "v8type" "branch")
-   (set_attr "type" "branch")]
+  [(set_attr "type" "branch")]
 )
 
 ;; Call subroutine returning any type.
@@ -794,12 +618,9 @@
        gcc_unreachable ();
      }
 }
-  [(set_attr "v8type" "move,alu,alu,load1,load1,store1,store1,*,*,*")
-   (set_attr "type" "mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,*,*,*")
-   (set_attr "simd_type" "*,*,simd_move_imm,*,*,*,*,simd_movgp,simd_dupgp,simd_dup")
-   (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")
-   (set_attr "mode" "<MODE>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
+                     neon_from_gp<q>,neon_from_gp<q>, neon_dup")
+   (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")]
 )
 
 (define_expand "mov<mode>"
@@ -837,10 +658,8 @@
    fmov\\t%s0, %w1
    fmov\\t%w0, %s1
    fmov\\t%s0, %s1"
-  [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov")
-   (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
                      adr,adr,fmov,fmov,fmov")
-   (set_attr "mode" "SI")
    (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
 )
 
@@ -864,10 +683,8 @@
    fmov\\t%x0, %d1
    fmov\\t%d0, %d1
    movi\\t%d0, %1"
-  [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov,fmov")
-   (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
                      adr,adr,fmov,fmov,fmov,fmov")
-   (set_attr "mode" "DI")
    (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
    (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
 )
@@ -880,9 +697,7 @@
   "UINTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode)
    && UINTVAL (operands[1]) % 16 == 0"
   "movk\\t%<w>0, %X2, lsl %1"
-  [(set_attr "v8type" "movk")
-   (set_attr "type" "mov_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "mov_imm")]
 )
 
 (define_expand "movti"
@@ -912,15 +727,12 @@
    stp\\txzr, xzr, %0
    ldr\\t%q0, %1
    str\\t%q1, %0"
-  [(set_attr "v8type" "move2,fmovi2f,fmovf2i,*, \
-		       load2,store2,store2,fpsimd_load,fpsimd_store")
-   (set_attr "type" "multiple,f_mcr,f_mrc,*, \
+  [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
 		             load2,store2,store2,f_loadd,f_stored")
-   (set_attr "simd_type" "*,*,*,simd_move,*,*,*,*,*")
-   (set_attr "mode" "DI,DI,DI,TI,DI,DI,DI,TI,TI")
    (set_attr "length" "8,8,8,4,4,4,4,4,4")
-   (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")
-   (set_attr "simd" "*,*,*,yes,*,*,*,*,*")])
+   (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
+   (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
+)
 
 ;; Split a TImode register-register or register-immediate move into
 ;; its component DImode pieces, taking care to handle overlapping
@@ -966,12 +778,8 @@
    ldr\\t%w0, %1
    str\\t%w1, %0
    mov\\t%w0, %w1"
-  [(set_attr "v8type" "fmovi2f,fmovf2i,\
-		       fmov,fconst,fpsimd_load,\
-		       fpsimd_store,fpsimd_load,fpsimd_store,fmov")
-   (set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
-                     f_loads,f_stores,f_loads,f_stores,fmov")
-   (set_attr "mode" "SF")]
+  [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
+                     f_loads,f_stores,f_loads,f_stores,fmov")]
 )
 
 (define_insn "*movdf_aarch64"
@@ -989,12 +797,8 @@
    ldr\\t%x0, %1
    str\\t%x1, %0
    mov\\t%x0, %x1"
-  [(set_attr "v8type" "fmovi2f,fmovf2i,\
-		       fmov,fconst,fpsimd_load,\
-		       fpsimd_store,fpsimd_load,fpsimd_store,move")
-   (set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\
-                     f_loadd,f_stored,f_loadd,f_stored,mov_reg")
-   (set_attr "mode" "DF")]
+  [(set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\
+                     f_loadd,f_stored,f_loadd,f_stored,mov_reg")]
 )
 
 (define_expand "movtf"
@@ -1031,10 +835,8 @@
    str\\t%q1, %0
    ldp\\t%0, %H0, %1
    stp\\t%1, %H1, %0"
-  [(set_attr "v8type" "logic,move2,fmovi2f,fmovf2i,fconst,fconst,fpsimd_load,fpsimd_store,fpsimd_load2,fpsimd_store2")
-   (set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,fconstd,fconstd,\
+  [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,fconstd,fconstd,\
                      f_loadd,f_stored,neon_load1_2reg,neon_store1_2reg")
-   (set_attr "mode" "DF,DF,DF,DF,DF,DF,TF,TF,DF,DF")
    (set_attr "length" "4,8,8,8,4,4,4,4,4,4")
    (set_attr "fp" "*,*,yes,yes,*,yes,yes,yes,*,*")
    (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*")]
@@ -1063,9 +865,7 @@
 			       XEXP (operands[1], 0),
 			       GET_MODE_SIZE (<MODE>mode)))"
   "ldp\\t%<w>0, %<w>2, %1"
-  [(set_attr "v8type" "load2")
-   (set_attr "type" "load2")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "load2")]
 )
 
 ;; Operands 0 and 2 are tied together by the final condition; so we allow
@@ -1080,9 +880,7 @@
 			       XEXP (operands[0], 0),
 			       GET_MODE_SIZE (<MODE>mode)))"
   "stp\\t%<w>1, %<w>3, %0"
-  [(set_attr "v8type" "store2")
-   (set_attr "type" "store2")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "store2")]
 )
 
 ;; Operands 1 and 3 are tied together by the final condition; so we allow
@@ -1097,9 +895,7 @@
 			       XEXP (operands[1], 0),
 			       GET_MODE_SIZE (<MODE>mode)))"
   "ldp\\t%<w>0, %<w>2, %1"
-  [(set_attr "v8type" "fpsimd_load2")
-   (set_attr "type" "neon_load1_2reg<q>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "neon_load1_2reg<q>")]
 )
 
 ;; Operands 0 and 2 are tied together by the final condition; so we allow
@@ -1114,9 +910,7 @@
 			       XEXP (operands[0], 0),
 			       GET_MODE_SIZE (<MODE>mode)))"
   "stp\\t%<w>1, %<w>3, %0"
-  [(set_attr "v8type" "fpsimd_store2")
-   (set_attr "type" "neon_store1_2reg<q>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "neon_store1_2reg<q>")]
 )
 
 ;; Load pair with writeback.  This is primarily used in function epilogues
@@ -1134,9 +928,7 @@
                    (match_operand:P 5 "const_int_operand" "n"))))])]
   "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
   "ldp\\t%<w>2, %<w>3, [%1], %4"
-  [(set_attr "v8type" "load2")
-   (set_attr "type" "load2")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "load2")]
 )
 
 ;; Store pair with writeback.  This is primarily used in function prologues
@@ -1154,9 +946,7 @@
           (match_operand:GPI 3 "register_operand" "r"))])]
   "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
   "stp\\t%<w>2, %<w>3, [%0, %4]!"
-  [(set_attr "v8type" "store2")
-   (set_attr "type" "store2")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "store2")]
 )
 
 ;; -------------------------------------------------------------------
@@ -1176,9 +966,7 @@
   "@
    sxtw\t%0, %w1
    ldrsw\t%0, %1"
-  [(set_attr "v8type" "extend,load1")
-   (set_attr "type" "extend,load1")
-   (set_attr "mode" "DI")]
+  [(set_attr "type" "extend,load1")]
 )
 
 (define_insn "*zero_extendsidi2_aarch64"
@@ -1188,9 +976,7 @@
   "@
    uxtw\t%0, %w1
    ldr\t%w0, %1"
-  [(set_attr "v8type" "extend,load1")
-   (set_attr "type" "extend,load1")
-   (set_attr "mode" "DI")]
+  [(set_attr "type" "extend,load1")]
 )
 
 (define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2"
@@ -1206,9 +992,7 @@
   "@
    sxt<SHORT:size>\t%<GPI:w>0, %w1
    ldrs<SHORT:size>\t%<GPI:w>0, %1"
-  [(set_attr "v8type" "extend,load1")
-   (set_attr "type" "extend,load1")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "extend,load1")]
 )
 
 (define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64"
@@ -1219,9 +1003,7 @@
    uxt<SHORT:size>\t%<GPI:w>0, %w1
    ldr<SHORT:size>\t%w0, %1
    ldr\t%<SHORT:size>0, %1"
-  [(set_attr "v8type" "extend,load1,load1")
-   (set_attr "type" "extend,load1,load1")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "extend,load1,load1")]
 )
 
 (define_expand "<optab>qihi2"
@@ -1237,9 +1019,7 @@
   "@
    <su>xtb\t%w0, %w1
    <ldrxt>b\t%w0, %1"
-  [(set_attr "v8type" "extend,load1")
-   (set_attr "type" "extend,load1")
-   (set_attr "mode" "HI")]
+  [(set_attr "type" "extend,load1")]
 )
 
 ;; -------------------------------------------------------------------
@@ -1282,9 +1062,7 @@
   add\\t%w0, %w1, %2
   add\\t%w0, %w1, %w2
   sub\\t%w0, %w1, #%n2"
-  [(set_attr "v8type" "alu")
-   (set_attr "type" "alu_imm,alu_reg,alu_imm")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_imm,alu_reg,alu_imm")]
 )
 
 ;; zero_extend version of above
@@ -1299,9 +1077,7 @@
   add\\t%w0, %w1, %2
   add\\t%w0, %w1, %w2
   sub\\t%w0, %w1, #%n2"
-  [(set_attr "v8type" "alu")
-   (set_attr "type" "alu_imm,alu_reg,alu_imm")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_imm,alu_reg,alu_imm")]
 )
 
 (define_insn "*adddi3_aarch64"
@@ -1316,9 +1092,7 @@
   add\\t%x0, %x1, %x2
   sub\\t%x0, %x1, #%n2
   add\\t%d0, %d1, %d2"
-  [(set_attr "v8type" "alu")
-   (set_attr "type" "alu_imm,alu_reg,alu_imm,alu_reg")
-   (set_attr "mode" "DI")
+  [(set_attr "type" "alu_imm,alu_reg,alu_imm,alu_reg")
    (set_attr "simd" "*,*,*,yes")]
 )
 
@@ -1335,9 +1109,7 @@
   adds\\t%<w>0, %<w>1, %<w>2
   adds\\t%<w>0, %<w>1, %<w>2
   subs\\t%<w>0, %<w>1, #%n2"
-  [(set_attr "v8type" "alus")
-   (set_attr "type" "alus_reg,alus_imm,alus_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alus_reg,alus_imm,alus_imm")]
 )
 
 ;; zero_extend version of above
@@ -1354,9 +1126,7 @@
   adds\\t%w0, %w1, %w2
   adds\\t%w0, %w1, %w2
   subs\\t%w0, %w1, #%n2"
-  [(set_attr "v8type" "alus")
-   (set_attr "type" "alus_reg,alus_imm,alus_imm")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alus_reg,alus_imm,alus_imm")]
 )
 
 (define_insn "*adds_mul_imm_<mode>"
@@ -1372,9 +1142,7 @@
 		  (match_dup 3)))]
   ""
   "adds\\t%<w>0, %<w>3, %<w>1, lsl %p2"
-  [(set_attr "v8type" "alus_shift")
-   (set_attr "type" "alus_shift_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alus_shift_imm")]
 )
 
 (define_insn "*subs_mul_imm_<mode>"
@@ -1390,9 +1158,7 @@
 		   (mult:GPI (match_dup 2) (match_dup 3))))]
   ""
   "subs\\t%<w>0, %<w>1, %<w>2, lsl %p3"
-  [(set_attr "v8type" "alus_shift")
-   (set_attr "type" "alus_shift_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alus_shift_imm")]
 )
 
 (define_insn "*adds_<optab><ALLX:mode>_<GPI:mode>"
@@ -1406,9 +1172,7 @@
 	(plus:GPI (ANY_EXTEND:GPI (match_dup 1)) (match_dup 2)))]
   ""
   "adds\\t%<GPI:w>0, %<GPI:w>2, %<GPI:w>1, <su>xt<ALLX:size>"
-  [(set_attr "v8type" "alus_ext")
-   (set_attr "type" "alus_ext")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "alus_ext")]
 )
 
 (define_insn "*subs_<optab><ALLX:mode>_<GPI:mode>"
@@ -1422,9 +1186,7 @@
 	(minus:GPI (match_dup 1) (ANY_EXTEND:GPI (match_dup 2))))]
   ""
   "subs\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size>"
-  [(set_attr "v8type" "alus_ext")
-   (set_attr "type" "alus_ext")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "alus_ext")]
 )
 
 (define_insn "*adds_<optab><mode>_multp2"
@@ -1444,9 +1206,7 @@
 		  (match_dup 4)))]
   "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
   "adds\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
-  [(set_attr "v8type" "alus_ext")
-   (set_attr "type" "alus_ext")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alus_ext")]
 )
 
 (define_insn "*subs_<optab><mode>_multp2"
@@ -1466,9 +1226,7 @@
 				  (const_int 0))))]
   "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
   "subs\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
-  [(set_attr "v8type" "alus_ext")
-   (set_attr "type" "alus_ext")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alus_ext")]
 )
 
 (define_insn "*add<mode>3nr_compare0"
@@ -1482,9 +1240,7 @@
   cmn\\t%<w>0, %<w>1
   cmn\\t%<w>0, %<w>1
   cmp\\t%<w>0, #%n1"
-  [(set_attr "v8type" "alus")
-   (set_attr "type" "alus_reg,alus_imm,alus_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alus_reg,alus_imm,alus_imm")]
 )
 
 (define_insn "*compare_neg<mode>"
@@ -1494,9 +1250,7 @@
 	 (match_operand:GPI 1 "register_operand" "r")))]
   ""
   "cmn\\t%<w>1, %<w>0"
-  [(set_attr "v8type" "alus")
-   (set_attr "type" "alus_reg")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alus_reg")]
 )
 
 (define_insn "*add_<shift>_<mode>"
@@ -1506,9 +1260,7 @@
 		  (match_operand:GPI 3 "register_operand" "r")))]
   ""
   "add\\t%<w>0, %<w>3, %<w>1, <shift> %2"
-  [(set_attr "v8type" "alu_shift")
-   (set_attr "type" "alu_shift_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alu_shift_imm")]
 )
 
 ;; zero_extend version of above
@@ -1516,13 +1268,11 @@
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
 	 (plus:SI (ASHIFT:SI (match_operand:SI 1 "register_operand" "r")
-		             (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
-	          (match_operand:SI 3 "register_operand" "r"))))]
+			     (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
+		  (match_operand:SI 3 "register_operand" "r"))))]
   ""
   "add\\t%w0, %w3, %w1, <shift> %2"
-  [(set_attr "v8type" "alu_shift")
-   (set_attr "type" "alu_shift_imm")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_shift_imm")]
 )
 
 (define_insn "*add_mul_imm_<mode>"
@@ -1532,9 +1282,7 @@
 		  (match_operand:GPI 3 "register_operand" "r")))]
   ""
   "add\\t%<w>0, %<w>3, %<w>1, lsl %p2"
-  [(set_attr "v8type" "alu_shift")
-   (set_attr "type" "alu_shift_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alu_shift_imm")]
 )
 
 (define_insn "*add_<optab><ALLX:mode>_<GPI:mode>"
@@ -1543,9 +1291,7 @@
 		  (match_operand:GPI 2 "register_operand" "r")))]
   ""
   "add\\t%<GPI:w>0, %<GPI:w>2, %<GPI:w>1, <su>xt<ALLX:size>"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "alu_ext")]
 )
 
 ;; zero_extend version of above
@@ -1556,9 +1302,7 @@
 		  (match_operand:GPI 2 "register_operand" "r"))))]
   ""
   "add\\t%w0, %w2, %w1, <su>xt<SHORT:size>"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_ext")]
 )
 
 (define_insn "*add_<optab><ALLX:mode>_shft_<GPI:mode>"
@@ -1569,9 +1313,7 @@
 		  (match_operand:GPI 3 "register_operand" "r")))]
   ""
   "add\\t%<GPI:w>0, %<GPI:w>3, %<GPI:w>1, <su>xt<ALLX:size> %2"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "alu_ext")]
 )
 
 ;; zero_extend version of above
@@ -1584,9 +1326,7 @@
 		  (match_operand:SI 3 "register_operand" "r"))))]
   ""
   "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %2"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_ext")]
 )
 
 (define_insn "*add_<optab><ALLX:mode>_mult_<GPI:mode>"
@@ -1597,9 +1337,7 @@
 		  (match_operand:GPI 3 "register_operand" "r")))]
   ""
   "add\\t%<GPI:w>0, %<GPI:w>3, %<GPI:w>1, <su>xt<ALLX:size> %p2"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "alu_ext")]
 )
 
 ;; zero_extend version of above
@@ -1611,9 +1349,7 @@
 		  (match_operand:SI 3 "register_operand" "r"))))]
   ""
   "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %p2"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_ext")]
 )
 
 (define_insn "*add_<optab><mode>_multp2"
@@ -1626,9 +1362,7 @@
 		  (match_operand:GPI 4 "register_operand" "r")))]
   "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
   "add\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alu_ext")]
 )
 
 ;; zero_extend version of above
@@ -1643,9 +1377,7 @@
 		  (match_operand:SI 4 "register_operand" "r"))))]
   "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])"
   "add\\t%w0, %w4, %w1, <su>xt%e3 %p2"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_ext")]
 )
 
 (define_insn "*add<mode>3_carryin"
@@ -1657,9 +1389,7 @@
 		(match_operand:GPI 2 "register_operand" "r"))))]
    ""
    "adc\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "v8type" "adc")
-   (set_attr "type" "adc_reg")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "adc_reg")]
 )
 
 ;; zero_extend version of above
@@ -1673,9 +1403,7 @@
 	       (match_operand:SI 2 "register_operand" "r")))))]
    ""
    "adc\\t%w0, %w1, %w2"
-  [(set_attr "v8type" "adc")
-   (set_attr "type" "adc_reg")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "adc_reg")]
 )
 
 (define_insn "*add<mode>3_carryin_alt1"
@@ -1687,9 +1415,7 @@
               (geu:GPI (reg:CC CC_REGNUM) (const_int 0))))]
    ""
    "adc\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "v8type" "adc")
-   (set_attr "type" "adc_reg")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "adc_reg")]
 )
 
 ;; zero_extend version of above
@@ -1703,9 +1429,7 @@
               (geu:SI (reg:CC CC_REGNUM) (const_int 0)))))]
    ""
    "adc\\t%w0, %w1, %w2"
-  [(set_attr "v8type" "adc")
-   (set_attr "type" "adc_reg")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "adc_reg")]
 )
 
 (define_insn "*add<mode>3_carryin_alt2"
@@ -1717,9 +1441,7 @@
 	      (match_operand:GPI 2 "register_operand" "r")))]
    ""
    "adc\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "v8type" "adc")
-   (set_attr "type" "adc_reg")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "adc_reg")]
 )
 
 ;; zero_extend version of above
@@ -1733,9 +1455,7 @@
 	      (match_operand:SI 2 "register_operand" "r"))))]
    ""
    "adc\\t%w0, %w1, %w2"
-  [(set_attr "v8type" "adc")
-   (set_attr "type" "adc_reg")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "adc_reg")]
 )
 
 (define_insn "*add<mode>3_carryin_alt3"
@@ -1747,9 +1467,7 @@
 	      (match_operand:GPI 1 "register_operand" "r")))]
    ""
    "adc\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "v8type" "adc")
-   (set_attr "type" "adc_reg")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "adc_reg")]
 )
 
 ;; zero_extend version of above
@@ -1763,9 +1481,7 @@
 	      (match_operand:SI 1 "register_operand" "r"))))]
    ""
    "adc\\t%w0, %w1, %w2"
-  [(set_attr "v8type" "adc")
-   (set_attr "type" "adc_reg")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "adc_reg")]
 )
 
 (define_insn "*add_uxt<mode>_multp2"
@@ -1780,9 +1496,7 @@
   operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
 					   INTVAL (operands[3])));
   return \"add\t%<w>0, %<w>4, %<w>1, uxt%e3 %p2\";"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alu_ext")]
 )
 
 ;; zero_extend version of above
@@ -1799,9 +1513,7 @@
   operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
 					   INTVAL (operands[3])));
   return \"add\t%w0, %w4, %w1, uxt%e3 %p2\";"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_ext")]
 )
 
 (define_insn "subsi3"
@@ -1810,9 +1522,7 @@
 		   (match_operand:SI 2 "register_operand" "r")))]
   ""
   "sub\\t%w0, %w1, %w2"
-  [(set_attr "v8type" "alu")
-   (set_attr "type" "alu_reg")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_reg")]
 )
 
 ;; zero_extend version of above
@@ -1823,9 +1533,7 @@
 		   (match_operand:SI 2 "register_operand" "r"))))]
   ""
   "sub\\t%w0, %w1, %w2"
-  [(set_attr "v8type" "alu")
-   (set_attr "type" "alu_reg")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_reg")]
 )
 
 (define_insn "subdi3"
@@ -1836,9 +1544,7 @@
   "@
    sub\\t%x0, %x1, %x2
    sub\\t%d0, %d1, %d2"
-  [(set_attr "v8type" "alu")
-   (set_attr "type" "alu_reg")
-   (set_attr "mode" "DI")
+  [(set_attr "type" "alu_reg, neon_sub")
    (set_attr "simd" "*,yes")]
 )
 
@@ -1852,9 +1558,7 @@
 	(minus:GPI (match_dup 1) (match_dup 2)))]
   ""
   "subs\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "v8type" "alus")
-   (set_attr "type" "alus_reg")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alus_reg")]
 )
 
 ;; zero_extend version of above
@@ -1867,9 +1571,7 @@
 	(zero_extend:DI (minus:SI (match_dup 1) (match_dup 2))))]
   ""
   "subs\\t%w0, %w1, %w2"
-  [(set_attr "v8type" "alus")
-   (set_attr "type" "alus_reg")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alus_reg")]
 )
 
 (define_insn "*sub_<shift>_<mode>"
@@ -1880,9 +1582,7 @@
 		    (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))))]
   ""
   "sub\\t%<w>0, %<w>3, %<w>1, <shift> %2"
-  [(set_attr "v8type" "alu_shift")
-   (set_attr "type" "alu_shift_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alu_shift_imm")]
 )
 
 ;; zero_extend version of above
@@ -1895,9 +1595,7 @@
 		    (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))]
   ""
   "sub\\t%w0, %w3, %w1, <shift> %2"
-  [(set_attr "v8type" "alu_shift")
-   (set_attr "type" "alu_shift_imm")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_shift_imm")]
 )
 
 (define_insn "*sub_mul_imm_<mode>"
@@ -1908,9 +1606,7 @@
 		    (match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))))]
   ""
   "sub\\t%<w>0, %<w>3, %<w>1, lsl %p2"
-  [(set_attr "v8type" "alu_shift")
-   (set_attr "type" "alu_shift_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alu_shift_imm")]
 )
 
 ;; zero_extend version of above
@@ -1923,9 +1619,7 @@
 		    (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))]
   ""
   "sub\\t%w0, %w3, %w1, lsl %p2"
-  [(set_attr "v8type" "alu_shift")
-   (set_attr "type" "alu_shift_imm")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_shift_imm")]
 )
 
 (define_insn "*sub_<optab><ALLX:mode>_<GPI:mode>"
@@ -1935,9 +1629,7 @@
 		    (match_operand:ALLX 2 "register_operand" "r"))))]
   ""
   "sub\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size>"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "alu_ext")]
 )
 
 ;; zero_extend version of above
@@ -1949,9 +1641,7 @@
 		    (match_operand:SHORT 2 "register_operand" "r")))))]
   ""
   "sub\\t%w0, %w1, %w2, <su>xt<SHORT:size>"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_ext")]
 )
 
 (define_insn "*sub_<optab><ALLX:mode>_shft_<GPI:mode>"
@@ -1962,9 +1652,7 @@
 			       (match_operand 3 "aarch64_imm3" "Ui3"))))]
   ""
   "sub\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size> %3"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "alu_ext")]
 )
 
 ;; zero_extend version of above
@@ -1977,9 +1665,7 @@
 			      (match_operand 3 "aarch64_imm3" "Ui3")))))]
   ""
   "sub\\t%w0, %w1, %w2, <su>xt<SHORT:size> %3"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_ext")]
 )
 
 (define_insn "*sub_<optab><mode>_multp2"
@@ -1992,9 +1678,7 @@
 		    (const_int 0))))]
   "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
   "sub\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alu_ext")]
 )
 
 ;; zero_extend version of above
@@ -2009,9 +1693,7 @@
 		    (const_int 0)))))]
   "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])"
   "sub\\t%w0, %w4, %w1, <su>xt%e3 %p2"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_ext")]
 )
 
 (define_insn "*sub<mode>3_carryin"
@@ -2023,9 +1705,7 @@
 	       (match_operand:GPI 2 "register_operand" "r")))]
    ""
    "sbc\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "v8type" "adc")
-   (set_attr "type" "adc_reg")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "adc_reg")]
 )
 
 ;; zero_extend version of the above
@@ -2039,9 +1719,7 @@
 	       (match_operand:SI 2 "register_operand" "r"))))]
    ""
    "sbc\\t%w0, %w1, %w2"
-  [(set_attr "v8type" "adc")
-   (set_attr "type" "adc_reg")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "adc_reg")]
 )
 
 (define_insn "*sub_uxt<mode>_multp2"
@@ -2056,9 +1734,7 @@
   operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
 					   INTVAL (operands[3])));
   return \"sub\t%<w>0, %<w>4, %<w>1, uxt%e3 %p2\";"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alu_ext")]
 )
 
 ;; zero_extend version of above
@@ -2075,9 +1751,7 @@
   operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
 					   INTVAL (operands[3])));
   return \"sub\t%w0, %w4, %w1, uxt%e3 %p2\";"
-  [(set_attr "v8type" "alu_ext")
-   (set_attr "type" "alu_ext")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_ext")]
 )
 
 (define_insn_and_split "absdi2"
@@ -2108,9 +1782,7 @@
 							     GEN_INT (63)))));
     DONE;
   }
-  [(set_attr "v8type" "alu")
-   (set_attr "type" "alu_reg")
-   (set_attr "mode" "DI")]
+  [(set_attr "type" "alu_reg")]
 )
 
 (define_insn "neg<mode>2"
@@ -2120,12 +1792,8 @@
   "@
    neg\\t%<w>0, %<w>1
    neg\\t%<rtn>0<vas>, %<rtn>1<vas>"
-  [(set_attr "v8type" "alu")
-   (set_attr "type" "alu_reg")
-   (set_attr "simd_type" "*,simd_negabs")
-   (set_attr "simd" "*,yes")
-   (set_attr "mode" "<MODE>")
-   (set_attr "simd_mode" "<MODE>")]
+  [(set_attr "type" "alu_reg, neon_neg<q>")
+   (set_attr "simd" "*,yes")]
 )
 
 ;; zero_extend version of above
@@ -2134,9 +1802,7 @@
 	(zero_extend:DI (neg:SI (match_operand:SI 1 "register_operand" "r"))))]
   ""
   "neg\\t%w0, %w1"
-  [(set_attr "v8type" "alu")
-   (set_attr "type" "alu_reg")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_reg")]
 )
 
 (define_insn "*ngc<mode>"
@@ -2145,9 +1811,7 @@
 		   (match_operand:GPI 1 "register_operand" "r")))]
   ""
   "ngc\\t%<w>0, %<w>1"
-  [(set_attr "v8type" "adc")
-   (set_attr "type" "adc_reg")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "adc_reg")]
 )
 
 (define_insn "*ngcsi_uxtw"
@@ -2157,9 +1821,7 @@
 		   (match_operand:SI 1 "register_operand" "r"))))]
   ""
   "ngc\\t%w0, %w1"
-  [(set_attr "v8type" "adc")
-   (set_attr "type" "adc_reg")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "adc_reg")]
 )
 
 (define_insn "*neg<mode>2_compare0"
@@ -2170,9 +1832,7 @@
 	(neg:GPI (match_dup 1)))]
   ""
   "negs\\t%<w>0, %<w>1"
-  [(set_attr "v8type" "alus")
-   (set_attr "type" "alus_reg")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alus_reg")]
 )
 
 ;; zero_extend version of above
@@ -2184,9 +1844,7 @@
 	(zero_extend:DI (neg:SI (match_dup 1))))]
   ""
   "negs\\t%w0, %w1"
-  [(set_attr "v8type" "alus")
-   (set_attr "type" "alus_reg")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alus_reg")]
 )
 
 (define_insn "*neg_<shift><mode>3_compare0"
@@ -2200,9 +1858,7 @@
 	(neg:GPI (ASHIFT:GPI (match_dup 1) (match_dup 2))))]
   ""
   "negs\\t%<w>0, %<w>1, <shift> %2"
-  [(set_attr "v8type" "alus_shift")
-   (set_attr "type" "alus_shift_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alus_shift_imm")]
 )
 
 (define_insn "*neg_<shift>_<mode>2"
@@ -2212,9 +1868,7 @@
 		  (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))))]
   ""
   "neg\\t%<w>0, %<w>1, <shift> %2"
-  [(set_attr "v8type" "alu_shift")
-   (set_attr "type" "alu_shift_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alu_shift_imm")]
 )
 
 ;; zero_extend version of above
@@ -2226,9 +1880,7 @@
 		  (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))]
   ""
   "neg\\t%w0, %w1, <shift> %2"
-  [(set_attr "v8type" "alu_shift")
-   (set_attr "type" "alu_shift_imm")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_shift_imm")]
 )
 
 (define_insn "*neg_mul_imm_<mode>2"
@@ -2238,9 +1890,7 @@
 		  (match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))))]
   ""
   "neg\\t%<w>0, %<w>1, lsl %p2"
-  [(set_attr "v8type" "alu_shift")
-   (set_attr "type" "alu_shift_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alu_shift_imm")]
 )
 
 ;; zero_extend version of above
@@ -2252,9 +1902,7 @@
 		  (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))]
   ""
   "neg\\t%w0, %w1, lsl %p2"
-  [(set_attr "v8type" "alu_shift")
-   (set_attr "type" "alu_shift_imm")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "alu_shift_imm")]
 )
 
 (define_insn "mul<mode>3"
@@ -2263,9 +1911,7 @@
 		  (match_operand:GPI 2 "register_operand" "r")))]
   ""
   "mul\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "v8type" "mult")
-   (set_attr "type" "mul")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "mul")]
 )
 
 ;; zero_extend version of above
@@ -2276,9 +1922,7 @@
 		  (match_operand:SI 2 "register_operand" "r"))))]
   ""
   "mul\\t%w0, %w1, %w2"
-  [(set_attr "v8type" "mult")
-   (set_attr "type" "mul")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "mul")]
 )
 
 (define_insn "*madd<mode>"
@@ -2288,9 +1932,7 @@
 		  (match_operand:GPI 3 "register_operand" "r")))]
   ""
   "madd\\t%<w>0, %<w>1, %<w>2, %<w>3"
-  [(set_attr "v8type" "madd")
-   (set_attr "type" "mla")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "mla")]
 )
 
 ;; zero_extend version of above
@@ -2302,9 +1944,7 @@
 		  (match_operand:SI 3 "register_operand" "r"))))]
   ""
   "madd\\t%w0, %w1, %w2, %w3"
-  [(set_attr "v8type" "madd")
-   (set_attr "type" "mla")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "mla")]
 )
 
 (define_insn "*msub<mode>"
@@ -2315,9 +1955,7 @@
 
   ""
   "msub\\t%<w>0, %<w>1, %<w>2, %<w>3"
-  [(set_attr "v8type" "madd")
-   (set_attr "type" "mla")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "mla")]
 )
 
 ;; zero_extend version of above
@@ -2330,9 +1968,7 @@
 
   ""
   "msub\\t%w0, %w1, %w2, %w3"
-  [(set_attr "v8type" "madd")
-   (set_attr "type" "mla")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "mla")]
 )
 
 (define_insn "*mul<mode>_neg"
@@ -2342,9 +1978,7 @@
 
   ""
   "mneg\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "v8type" "mult")
-   (set_attr "type" "mul")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "mul")]
 )
 
 ;; zero_extend version of above
@@ -2356,9 +1990,7 @@
 
   ""
   "mneg\\t%w0, %w1, %w2"
-  [(set_attr "v8type" "mult")
-   (set_attr "type" "mul")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "mul")]
 )
 
 (define_insn "<su_optab>mulsidi3"
@@ -2367,9 +1999,7 @@
 		 (ANY_EXTEND:DI (match_operand:SI 2 "register_operand" "r"))))]
   ""
   "<su>mull\\t%0, %w1, %w2"
-  [(set_attr "v8type" "mull")
-   (set_attr "type" "<su>mull")
-   (set_attr "mode" "DI")]
+  [(set_attr "type" "<su>mull")]
 )
 
 (define_insn "<su_optab>maddsidi4"
@@ -2380,9 +2010,7 @@
 		 (match_operand:DI 3 "register_operand" "r")))]
   ""
   "<su>maddl\\t%0, %w1, %w2, %3"
-  [(set_attr "v8type" "maddl")
-   (set_attr "type" "<su>mlal")
-   (set_attr "mode" "DI")]
+  [(set_attr "type" "<su>mlal")]
 )
 
 (define_insn "<su_optab>msubsidi4"
@@ -2394,9 +2022,7 @@
 		   (match_operand:SI 2 "register_operand" "r")))))]
   ""
   "<su>msubl\\t%0, %w1, %w2, %3"
-  [(set_attr "v8type" "maddl")
-   (set_attr "type" "<su>mlal")
-   (set_attr "mode" "DI")]
+  [(set_attr "type" "<su>mlal")]
 )
 
 (define_insn "*<su_optab>mulsidi_neg"
@@ -2406,9 +2032,7 @@
 		  (ANY_EXTEND:DI (match_operand:SI 2 "register_operand" "r"))))]
   ""
   "<su>mnegl\\t%0, %w1, %w2"
-  [(set_attr "v8type" "mull")
-   (set_attr "type" "<su>mull")
-   (set_attr "mode" "DI")]
+  [(set_attr "type" "<su>mull")]
 )
 
 (define_insn "<su>muldi3_highpart"
@@ -2421,9 +2045,7 @@
 	  (const_int 64))))]
   ""
   "<su>mulh\\t%0, %1, %2"
-  [(set_attr "v8type" "mulh")
-   (set_attr "type" "<su>mull")
-   (set_attr "mode" "DI")]
+  [(set_attr "type" "<su>mull")]
 )
 
 (define_insn "<su_optab>div<mode>3"
@@ -2432,9 +2054,7 @@
 		     (match_operand:GPI 2 "register_operand" "r")))]
   ""
   "<su>div\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "v8type" "<su>div")
-   (set_attr "type" "<su>div")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "<su>div")]
 )
 
 ;; zero_extend version of above
@@ -2445,9 +2065,7 @@
 		     (match_operand:SI 2 "register_operand" "r"))))]
   ""
   "<su>div\\t%w0, %w1, %w2"
-  [(set_attr "v8type" "<su>div")
-   (set_attr "type" "<su>div")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "<su>div")]
 )
 
 ;; -------------------------------------------------------------------
@@ -2463,9 +2081,7 @@
    cmp\\t%<w>0, %<w>1
    cmp\\t%<w>0, %<w>1
    cmn\\t%<w>0, #%n1"
-  [(set_attr "v8type" "alus")
-   (set_attr "type" "alus_reg,alus_imm,alus_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alus_reg,alus_imm,alus_imm")]
 )
 
 (define_insn "*cmp<mode>"
@@ -2476,9 +2092,7 @@
    "@
     fcmp\\t%<s>0, #0.0
     fcmp\\t%<s>0, %<s>1"
-  [(set_attr "v8type" "fcmp")
-   (set_attr "type" "fcmp<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "fcmp<s>")]
 )
 
 (define_insn "*cmpe<mode>"
@@ -2489,9 +2103,7 @@
    "@
     fcmpe\\t%<s>0, #0.0
     fcmpe\\t%<s>0, %<s>1"
-  [(set_attr "v8type" "fcmp")
-   (set_attr "type" "fcmp<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "fcmp<s>")]
 )
 
 (define_insn "*cmp_swp_<shift>_reg<mode>"
@@ -2502,9 +2114,7 @@
 			(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")))]
   ""
   "cmp\\t%<w>2, %<w>0, <shift> %1"
-  [(set_attr "v8type" "alus_shift")
-   (set_attr "type" "alus_shift_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alus_shift_imm")]
 )
 
 (define_insn "*cmp_swp_<optab><ALLX:mode>_reg<GPI:mode>"
@@ -2514,9 +2124,7 @@
 			(match_operand:GPI 1 "register_operand" "r")))]
   ""
   "cmp\\t%<GPI:w>1, %<GPI:w>0, <su>xt<ALLX:size>"
-  [(set_attr "v8type" "alus_ext")
-   (set_attr "type" "alus_ext")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "alus_ext")]
 )
 
 (define_insn "*cmp_swp_<optab><ALLX:mode>_shft_<GPI:mode>"
@@ -2528,9 +2136,7 @@
 	(match_operand:GPI 2 "register_operand" "r")))]
   ""
   "cmp\\t%<GPI:w>2, %<GPI:w>0, <su>xt<ALLX:size> %1"
-  [(set_attr "v8type" "alus_ext")
-   (set_attr "type" "alus_ext")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "alus_ext")]
 )
 
 ;; -------------------------------------------------------------------
@@ -2569,9 +2175,7 @@
 	 [(match_operand 2 "cc_register" "") (const_int 0)]))]
   ""
   "cset\\t%<w>0, %m1"
-  [(set_attr "v8type" "csel")
-   (set_attr "type" "csel")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "csel")]
 )
 
 ;; zero_extend version of the above
@@ -2582,9 +2186,7 @@
 	  [(match_operand 2 "cc_register" "") (const_int 0)])))]
   ""
   "cset\\t%w0, %m1"
-  [(set_attr "v8type" "csel")
-   (set_attr "type" "csel")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "csel")]
 )
 
 (define_insn "cstore<mode>_neg"
@@ -2593,9 +2195,7 @@
 		  [(match_operand 2 "cc_register" "") (const_int 0)])))]
   ""
   "csetm\\t%<w>0, %m1"
-  [(set_attr "v8type" "csel")
-   (set_attr "type" "csel")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "csel")]
 )
 
 ;; zero_extend version of the above
@@ -2606,9 +2206,7 @@
 		  [(match_operand 2 "cc_register" "") (const_int 0)]))))]
   ""
   "csetm\\t%w0, %m1"
-  [(set_attr "v8type" "csel")
-   (set_attr "type" "csel")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "csel")]
 )
 
 (define_expand "cmov<mode>6"
@@ -2661,9 +2259,7 @@
    csinc\\t%<w>0, %<w>4, <w>zr, %M1
    mov\\t%<w>0, -1
    mov\\t%<w>0, 1"
-  [(set_attr "v8type" "csel")
-   (set_attr "type" "csel")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "csel")]
 )
 
 ;; zero_extend version of above
@@ -2686,9 +2282,7 @@
    csinc\\t%w0, %w4, wzr, %M1
    mov\\t%w0, -1
    mov\\t%w0, 1"
-  [(set_attr "v8type" "csel")
-   (set_attr "type" "csel")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "csel")]
 )
 
 (define_insn "*cmov<mode>_insn"
@@ -2700,9 +2294,7 @@
 	 (match_operand:GPF 4 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fcsel\\t%<s>0, %<s>3, %<s>4, %m1"
-  [(set_attr "v8type" "fcsel")
-   (set_attr "type" "fcsel")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "fcsel")]
 )
 
 (define_expand "mov<mode>cc"
@@ -2750,9 +2342,8 @@
 		 (match_operand:GPI 1 "register_operand" "r")))]
   ""
   "csinc\\t%<w>0, %<w>1, %<w>1, %M2"
-  [(set_attr "v8type" "csel")
-   (set_attr "type" "csel")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "csel")]
+)
 
 (define_insn "csinc3<mode>_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
@@ -2764,9 +2355,7 @@
 	  (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
   ""
   "csinc\\t%<w>0, %<w>4, %<w>3, %M1"
-  [(set_attr "v8type" "csel")
-   (set_attr "type" "csel")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "csel")]
 )
 
 (define_insn "*csinv3<mode>_insn"
@@ -2778,9 +2367,8 @@
 	  (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
   ""
   "csinv\\t%<w>0, %<w>4, %<w>3, %M1"
-  [(set_attr "v8type" "csel")
-   (set_attr "type" "csel")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "csel")]
+)
 
 (define_insn "*csneg3<mode>_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
@@ -2791,9 +2379,8 @@
 	  (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
   ""
   "csneg\\t%<w>0, %<w>4, %<w>3, %M1"
-  [(set_attr "v8type" "csel")
-   (set_attr "type" "csel")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "csel")]
+)
 
 ;; -------------------------------------------------------------------
 ;; Logical operations
@@ -2805,9 +2392,8 @@
 		     (match_operand:GPI 2 "aarch64_logical_operand" "r,<lconst>")))]
   ""
   "<logical>\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "v8type" "logic,logic_imm")
-   (set_attr "type" "logic_reg,logic_imm")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "logic_reg,logic_imm")]
+)
 
 ;; zero_extend version of above
 (define_insn "*<optab>si3_uxtw"
@@ -2817,9 +2403,8 @@
 		     (match_operand:SI 2 "aarch64_logical_operand" "r,K"))))]
   ""
   "<logical>\\t%w0, %w1, %w2"
-  [(set_attr "v8type" "logic,logic_imm")
-   (set_attr "type" "logic_reg,logic_imm")
-   (set_attr "mode" "SI")])
+  [(set_attr "type" "logic_reg,logic_imm")]
+)
 
 (define_insn "*and<mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
@@ -2831,9 +2416,7 @@
 	(and:GPI (match_dup 1) (match_dup 2)))]
   ""
   "ands\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "v8type" "logics,logics_imm")
-   (set_attr "type" "logics_reg,logics_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "logics_reg,logics_imm")]
 )
 
 ;; zero_extend version of above
@@ -2847,9 +2430,7 @@
 	(zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
   ""
   "ands\\t%w0, %w1, %w2"
-  [(set_attr "v8type" "logics,logics_imm")
-   (set_attr "type" "logics_reg,logics_imm")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "logics_reg,logics_imm")]
 )
 
 (define_insn "*and_<SHIFT:optab><mode>3_compare0"
@@ -2864,9 +2445,7 @@
 	(and:GPI (SHIFT:GPI (match_dup 1) (match_dup 2)) (match_dup 3)))]
   ""
   "ands\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
-  [(set_attr "v8type" "logics_shift")
-   (set_attr "type" "logics_shift_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "logics_shift_imm")]
 )
 
 ;; zero_extend version of above
@@ -2883,9 +2462,7 @@
 				(match_dup 3))))]
   ""
   "ands\\t%w0, %w3, %w1, <SHIFT:shift> %2"
-  [(set_attr "v8type" "logics_shift")
-   (set_attr "type" "logics_shift_imm")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "logics_shift_imm")]
 )
 
 (define_insn "*<LOGICAL:optab>_<SHIFT:optab><mode>3"
@@ -2896,9 +2473,8 @@
 		     (match_operand:GPI 3 "register_operand" "r")))]
   ""
   "<LOGICAL:logical>\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
-  [(set_attr "v8type" "logic_shift")
-   (set_attr "type" "logic_shift_imm")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "logic_shift_imm")]
+)
 
 ;; zero_extend version of above
 (define_insn "*<LOGICAL:optab>_<SHIFT:optab>si3_uxtw"
@@ -2910,18 +2486,16 @@
 		     (match_operand:SI 3 "register_operand" "r"))))]
   ""
   "<LOGICAL:logical>\\t%w0, %w3, %w1, <SHIFT:shift> %2"
-  [(set_attr "v8type" "logic_shift")
-   (set_attr "type" "logic_shift_imm")
-   (set_attr "mode" "SI")])
+  [(set_attr "type" "logic_shift_imm")]
+)
 
 (define_insn "one_cmpl<mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(not:GPI (match_operand:GPI 1 "register_operand" "r")))]
   ""
   "mvn\\t%<w>0, %<w>1"
-  [(set_attr "v8type" "logic")
-   (set_attr "type" "logic_reg")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "logic_reg")]
+)
 
 (define_insn "*one_cmpl_<optab><mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
@@ -2929,9 +2503,8 @@
 			    (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))))]
   ""
   "mvn\\t%<w>0, %<w>1, <shift> %2"
-  [(set_attr "v8type" "logic_shift")
-   (set_attr "type" "logic_shift_imm")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "logic_shift_imm")]
+)
 
 (define_insn "*<LOGICAL:optab>_one_cmpl<mode>3"
   [(set (match_operand:GPI 0 "register_operand" "=r")
@@ -2940,9 +2513,8 @@
 		     (match_operand:GPI 2 "register_operand" "r")))]
   ""
   "<LOGICAL:nlogical>\\t%<w>0, %<w>2, %<w>1"
-  [(set_attr "v8type" "logic")
-   (set_attr "type" "logic_reg")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "logic_reg")]
+)
 
 (define_insn "*and_one_cmpl<mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
@@ -2955,9 +2527,8 @@
 	(and:GPI (not:GPI (match_dup 1)) (match_dup 2)))]
   ""
   "bics\\t%<w>0, %<w>2, %<w>1"
-  [(set_attr "v8type" "logics")
-   (set_attr "type" "logics_reg")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "logics_reg")]
+)
 
 ;; zero_extend version of above
 (define_insn "*and_one_cmplsi3_compare0_uxtw"
@@ -2971,9 +2542,8 @@
 	(zero_extend:DI (and:SI (not:SI (match_dup 1)) (match_dup 2))))]
   ""
   "bics\\t%w0, %w2, %w1"
-  [(set_attr "v8type" "logics")
-   (set_attr "type" "logics_reg")
-   (set_attr "mode" "SI")])
+  [(set_attr "type" "logics_reg")]
+)
 
 (define_insn "*<LOGICAL:optab>_one_cmpl_<SHIFT:optab><mode>3"
   [(set (match_operand:GPI 0 "register_operand" "=r")
@@ -2984,9 +2554,8 @@
 		     (match_operand:GPI 3 "register_operand" "r")))]
   ""
   "<LOGICAL:nlogical>\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
-  [(set_attr "v8type" "logic_shift")
-   (set_attr "type" "logics_shift_imm")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "logics_shift_imm")]
+)
 
 (define_insn "*and_one_cmpl_<SHIFT:optab><mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
@@ -3003,9 +2572,8 @@
 		   (match_dup 1) (match_dup 2))) (match_dup 3)))]
   ""
   "bics\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
-  [(set_attr "v8type" "logics_shift")
-   (set_attr "type" "logics_shift_imm")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "logics_shift_imm")]
+)
 
 ;; zero_extend version of above
 (define_insn "*and_one_cmpl_<SHIFT:optab>si3_compare0_uxtw"
@@ -3023,18 +2591,16 @@
 			  (SHIFT:SI (match_dup 1) (match_dup 2))) (match_dup 3))))]
   ""
   "bics\\t%w0, %w3, %w1, <SHIFT:shift> %2"
-  [(set_attr "v8type" "logics_shift")
-   (set_attr "type" "logics_shift_imm")
-   (set_attr "mode" "SI")])
+  [(set_attr "type" "logics_shift_imm")]
+)
 
 (define_insn "clz<mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(clz:GPI (match_operand:GPI 1 "register_operand" "r")))]
   ""
   "clz\\t%<w>0, %<w>1"
-  [(set_attr "v8type" "clz")
-   (set_attr "type" "clz")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "clz")]
+)
 
 (define_expand "ffs<mode>2"
   [(match_operand:GPI 0 "register_operand")
@@ -3056,18 +2622,16 @@
 	(unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_CLS))]
   ""
   "cls\\t%<w>0, %<w>1"
-  [(set_attr "v8type" "clz")
-   (set_attr "type" "clz")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "clz")]
+)
 
 (define_insn "rbit<mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_RBIT))]
   ""
   "rbit\\t%<w>0, %<w>1"
-  [(set_attr "v8type" "rbit")
-   (set_attr "type" "rbit")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "rbit")]
+)
 
 (define_expand "ctz<mode>2"
   [(match_operand:GPI 0 "register_operand")
@@ -3088,9 +2652,8 @@
 	 (const_int 0)))]
   ""
   "tst\\t%<w>0, %<w>1"
-  [(set_attr "v8type" "logics")
-   (set_attr "type" "logics_reg")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "logics_reg")]
+)
 
 (define_insn "*and_<SHIFT:optab><mode>3nr_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
@@ -3102,9 +2665,8 @@
 	(const_int 0)))]
   ""
   "tst\\t%<w>2, %<w>0, <SHIFT:shift> %1"
-  [(set_attr "v8type" "logics_shift")
-   (set_attr "type" "logics_shift_imm")
-   (set_attr "mode" "<MODE>")])
+  [(set_attr "type" "logics_shift_imm")]
+)
 
 ;; -------------------------------------------------------------------
 ;; Shifts
@@ -3205,11 +2767,7 @@
    ushl\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
    lsl\t%<w>0, %<w>1, %<w>2"
   [(set_attr "simd" "yes,yes,no")
-   (set_attr "simd_type" "simd_shift_imm,simd_shift,*")
-   (set_attr "simd_mode" "<MODE>,<MODE>,*")
-   (set_attr "v8type" "*,*,shift")
-   (set_attr "type" "*,*,shift_reg")
-   (set_attr "mode" "*,*,<MODE>")]
+   (set_attr "type" "neon_shift_imm<q>, neon_shift_reg<q>,shift_reg")]
 )
 
 ;; Logical right shift using SISD or Integer instruction
@@ -3224,11 +2782,7 @@
    #
    lsr\t%<w>0, %<w>1, %<w>2"
   [(set_attr "simd" "yes,yes,no")
-   (set_attr "simd_type" "simd_shift_imm,simd_shift,*")
-   (set_attr "simd_mode" "<MODE>,<MODE>,*")
-   (set_attr "v8type" "*,*,shift")
-   (set_attr "type" "*,*,shift_reg")
-   (set_attr "mode" "*,*,<MODE>")]
+   (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,shift_reg")]
 )
 
 (define_split
@@ -3269,11 +2823,7 @@
    #
    asr\t%<w>0, %<w>1, %<w>2"
   [(set_attr "simd" "yes,yes,no")
-   (set_attr "simd_type" "simd_shift_imm,simd_shift,*")
-   (set_attr "simd_mode" "<MODE>,<MODE>,*")
-   (set_attr "v8type" "*,*,shift")
-   (set_attr "type" "*,*,shift_reg")
-   (set_attr "mode" "*,*,<MODE>")]
+   (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,shift_reg")]
 )
 
 (define_split
@@ -3310,8 +2860,7 @@
   "TARGET_SIMD"
   "ushl\t%d0, %d1, %d2"
   [(set_attr "simd" "yes")
-   (set_attr "simd_type" "simd_shift")
-   (set_attr "simd_mode" "DI")]
+   (set_attr "type" "neon_shift_reg")]
 )
 
 (define_insn "*aarch64_ushl_2s"
@@ -3322,8 +2871,7 @@
   "TARGET_SIMD"
   "ushl\t%0.2s, %1.2s, %2.2s"
   [(set_attr "simd" "yes")
-   (set_attr "simd_type" "simd_shift")
-   (set_attr "simd_mode" "DI")]
+   (set_attr "type" "neon_shift_reg")]
 )
 
 (define_insn "*aarch64_sisd_sshl"
@@ -3334,8 +2882,7 @@
   "TARGET_SIMD"
   "sshl\t%d0, %d1, %d2"
   [(set_attr "simd" "yes")
-   (set_attr "simd_type" "simd_shift")
-   (set_attr "simd_mode" "DI")]
+   (set_attr "type" "neon_shift_reg")]
 )
 
 (define_insn "*aarch64_sshl_2s"
@@ -3346,8 +2893,7 @@
   "TARGET_SIMD"
   "sshl\t%0.2s, %1.2s, %2.2s"
   [(set_attr "simd" "yes")
-   (set_attr "simd_type" "simd_shift")
-   (set_attr "simd_mode" "DI")]
+   (set_attr "type" "neon_shift_reg")]
 )
 
 (define_insn "*aarch64_sisd_neg_qi"
@@ -3357,8 +2903,7 @@
   "TARGET_SIMD"
   "neg\t%d0, %d1"
   [(set_attr "simd" "yes")
-   (set_attr "simd_type" "simd_negabs")
-   (set_attr "simd_mode" "QI")]
+   (set_attr "type" "neon_neg")]
 )
 
 ;; Rotate right
@@ -3369,9 +2914,7 @@
           (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>")))]
   ""
   "ror\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "v8type" "shift")
-   (set_attr "type" "shift_reg")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "shift_reg")]
 )
 
 ;; zero_extend version of above
@@ -3382,9 +2925,7 @@
 	 (match_operand:QI 2 "aarch64_reg_or_shift_imm_si" "rUss"))))]
   ""
   "<shift>\\t%w0, %w1, %w2"
-  [(set_attr "v8type" "shift")
-   (set_attr "type" "shift_reg")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "shift_reg")]
 )
 
 (define_insn "*ashl<mode>3_insn"
@@ -3393,9 +2934,7 @@
 		      (match_operand:QI 2 "aarch64_reg_or_shift_imm_si" "rUss")))]
   ""
   "lsl\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "v8type" "shift")
-   (set_attr "type" "shift_reg")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "shift_reg")]
 )
 
 (define_insn "*<optab><mode>3_insn"
@@ -3407,9 +2946,7 @@
   operands[3] = GEN_INT (<sizen> - UINTVAL (operands[2]));
   return "<bfshift>\t%w0, %w1, %2, %3";
 }
-  [(set_attr "v8type" "bfm")
-   (set_attr "type" "bfm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "bfm")]
 )
 
 (define_insn "*extr<mode>5_insn"
@@ -3421,9 +2958,7 @@
   "UINTVAL (operands[3]) < GET_MODE_BITSIZE (<MODE>mode) &&
    (UINTVAL (operands[3]) + UINTVAL (operands[4]) == GET_MODE_BITSIZE (<MODE>mode))"
   "extr\\t%<w>0, %<w>1, %<w>2, %4"
-  [(set_attr "v8type" "shift")
-   (set_attr "type" "shift_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "shift_imm")]
 )
 
 ;; zero_extend version of the above
@@ -3437,9 +2972,7 @@
   "UINTVAL (operands[3]) < 32 &&
    (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"
   "extr\\t%w0, %w1, %w2, %4"
-  [(set_attr "v8type" "shift")
-   (set_attr "type" "shift_imm")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "shift_imm")]
 )
 
 (define_insn "*ror<mode>3_insn"
@@ -3451,9 +2984,7 @@
   operands[3] = GEN_INT (<sizen> - UINTVAL (operands[2]));
   return "ror\\t%<w>0, %<w>1, %3";
 }
-  [(set_attr "v8type" "shift")
-   (set_attr "type" "shift_imm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "shift_imm")]
 )
 
 ;; zero_extend version of the above
@@ -3467,9 +2998,7 @@
   operands[3] = GEN_INT (32 - UINTVAL (operands[2]));
   return "ror\\t%w0, %w1, %3";
 }
-  [(set_attr "v8type" "shift")
-   (set_attr "type" "shift_imm")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "shift_imm")]
 )
 
 (define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
@@ -3482,9 +3011,7 @@
   operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
   return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
-  [(set_attr "v8type" "bfm")
-   (set_attr "type" "bfm")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "bfm")]
 )
 
 (define_insn "*zero_extend<GPI:mode>_lshr<SHORT:mode>"
@@ -3497,9 +3024,7 @@
   operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
   return "ubfx\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
-  [(set_attr "v8type" "bfm")
-   (set_attr "type" "bfm")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "bfm")]
 )
 
 (define_insn "*extend<GPI:mode>_ashr<SHORT:mode>"
@@ -3512,9 +3037,7 @@
   operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
   return "sbfx\\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
-  [(set_attr "v8type" "bfm")
-   (set_attr "type" "bfm")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "bfm")]
 )
 
 ;; -------------------------------------------------------------------
@@ -3537,9 +3060,7 @@
 			 (match_operand 3 "const_int_operand" "n")))]
   ""
   "<su>bfx\\t%<w>0, %<w>1, %3, %2"
-  [(set_attr "v8type" "bfm")
-   (set_attr "type" "bfm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "bfm")]
 )
 
 ;; Bitfield Insert (insv)
@@ -3582,9 +3103,7 @@
      || (UINTVAL (operands[2]) + UINTVAL (operands[1])
 	 > GET_MODE_BITSIZE (<MODE>mode)))"
   "bfi\\t%<w>0, %<w>3, %2, %1"
-  [(set_attr "v8type" "bfm")
-   (set_attr "type" "bfm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "bfm")]
 )
 
 (define_insn "*extr_insv_lower_reg<mode>"
@@ -3598,9 +3117,7 @@
      || (UINTVAL (operands[3]) + UINTVAL (operands[1])
 	 > GET_MODE_BITSIZE (<MODE>mode)))"
   "bfxil\\t%<w>0, %<w>2, %3, %1"
-  [(set_attr "v8type" "bfm")
-   (set_attr "type" "bfm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "bfm")]
 )
 
 (define_insn "*<optab><ALLX:mode>_shft_<GPI:mode>"
@@ -3615,9 +3132,7 @@
 	      : GEN_INT (<GPI:sizen> - UINTVAL (operands[2]));
   return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
-  [(set_attr "v8type" "bfm")
-   (set_attr "type" "bfm")
-   (set_attr "mode" "<GPI:MODE>")]
+  [(set_attr "type" "bfm")]
 )
 
 ;; XXX We should match (any_extend (ashift)) here, like (and (ashift)) below
@@ -3630,9 +3145,7 @@
   "exact_log2 ((INTVAL (operands[3]) >> INTVAL (operands[2])) + 1) >= 0
    && (INTVAL (operands[3]) & ((1 << INTVAL (operands[2])) - 1)) == 0"
   "ubfiz\\t%<w>0, %<w>1, %2, %P3"
-  [(set_attr "v8type" "bfm")
-   (set_attr "type" "bfm")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "bfm")]
 )
 
 (define_insn "bswap<mode>2"
@@ -3640,9 +3153,7 @@
         (bswap:GPI (match_operand:GPI 1 "register_operand" "r")))]
   ""
   "rev\\t%<w>0, %<w>1"
-  [(set_attr "v8type" "rev")
-   (set_attr "type" "rev")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "rev")]
 )
 
 (define_insn "bswaphi2"
@@ -3650,9 +3161,7 @@
         (bswap:HI (match_operand:HI 1 "register_operand" "r")))]
   ""
   "rev16\\t%w0, %w1"
-  [(set_attr "v8type" "rev")
-   (set_attr "type" "rev")
-   (set_attr "mode" "HI")]
+  [(set_attr "type" "rev")]
 )
 
 ;; zero_extend version of above
@@ -3661,9 +3170,7 @@
         (zero_extend:DI (bswap:SI (match_operand:SI 1 "register_operand" "r"))))]
   ""
   "rev\\t%w0, %w1"
-  [(set_attr "v8type" "rev")
-   (set_attr "type" "rev")
-   (set_attr "mode" "SI")]
+  [(set_attr "type" "rev")]
 )
 
 ;; -------------------------------------------------------------------
@@ -3679,9 +3186,7 @@
 	 FRINT))]
   "TARGET_FLOAT"
   "frint<frint_suffix>\\t%<s>0, %<s>1"
-  [(set_attr "v8type" "frint")
-   (set_attr "type" "f_rint<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "f_rint<s>")]
 )
 
 ;; frcvt floating-point round to integer and convert standard patterns.
@@ -3692,10 +3197,7 @@
 		      FCVT)))]
   "TARGET_FLOAT"
   "fcvt<frint_suffix><su>\\t%<GPI:w>0, %<GPF:s>1"
-  [(set_attr "v8type" "fcvtf2i")
-   (set_attr "type" "f_cvtf2i")
-   (set_attr "mode" "<GPF:MODE>")
-   (set_attr "mode2" "<GPI:MODE>")]
+  [(set_attr "type" "f_cvtf2i")]
 )
 
 ;; fma - no throw
@@ -3707,9 +3209,7 @@
 		 (match_operand:GPF 3 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
-  [(set_attr "v8type" "fmadd")
-   (set_attr "type" "fmac<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "fmac<s>")]
 )
 
 (define_insn "fnma<mode>4"
@@ -3719,9 +3219,7 @@
 		 (match_operand:GPF 3 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fmsub\\t%<s>0, %<s>1, %<s>2, %<s>3"
-  [(set_attr "v8type" "fmadd")
-   (set_attr "type" "fmac<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "fmac<s>")]
 )
 
 (define_insn "fms<mode>4"
@@ -3731,9 +3229,7 @@
 		 (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))]
   "TARGET_FLOAT"
   "fnmsub\\t%<s>0, %<s>1, %<s>2, %<s>3"
-  [(set_attr "v8type" "fmadd")
-   (set_attr "type" "fmac<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "fmac<s>")]
 )
 
 (define_insn "fnms<mode>4"
@@ -3743,9 +3239,7 @@
 		 (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))]
   "TARGET_FLOAT"
   "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
-  [(set_attr "v8type" "fmadd")
-   (set_attr "type" "fmac<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "fmac<s>")]
 )
 
 ;; If signed zeros are ignored, -(a * b + c) = -a * b - c.
@@ -3756,9 +3250,7 @@
 			  (match_operand:GPF 3 "register_operand" "w"))))]
   "!HONOR_SIGNED_ZEROS (<MODE>mode) && TARGET_FLOAT"
   "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
-  [(set_attr "v8type" "fmadd")
-   (set_attr "type" "fmac<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "fmac<s>")]
 )
 
 ;; -------------------------------------------------------------------
@@ -3770,10 +3262,7 @@
         (float_extend:DF (match_operand:SF 1 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fcvt\\t%d0, %s1"
-  [(set_attr "v8type" "fcvt")
-   (set_attr "type" "f_cvt")
-   (set_attr "mode" "DF")
-   (set_attr "mode2" "SF")]
+  [(set_attr "type" "f_cvt")]
 )
 
 (define_insn "truncdfsf2"
@@ -3781,10 +3270,7 @@
         (float_truncate:SF (match_operand:DF 1 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fcvt\\t%s0, %d1"
-  [(set_attr "v8type" "fcvt")
-   (set_attr "type" "f_cvt")
-   (set_attr "mode" "SF")
-   (set_attr "mode2" "DF")]
+  [(set_attr "type" "f_cvt")]
 )
 
 (define_insn "fix_trunc<GPF:mode><GPI:mode>2"
@@ -3792,10 +3278,7 @@
         (fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fcvtzs\\t%<GPI:w>0, %<GPF:s>1"
-  [(set_attr "v8type" "fcvtf2i")
-   (set_attr "type" "f_cvtf2i")
-   (set_attr "mode" "<GPF:MODE>")
-   (set_attr "mode2" "<GPI:MODE>")]
+  [(set_attr "type" "f_cvtf2i")]
 )
 
 (define_insn "fixuns_trunc<GPF:mode><GPI:mode>2"
@@ -3803,10 +3286,7 @@
         (unsigned_fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fcvtzu\\t%<GPI:w>0, %<GPF:s>1"
-  [(set_attr "v8type" "fcvtf2i")
-   (set_attr "type" "f_cvtf2i")
-   (set_attr "mode" "<GPF:MODE>")
-   (set_attr "mode2" "<GPI:MODE>")]
+  [(set_attr "type" "f_cvtf2i")]
 )
 
 (define_insn "float<GPI:mode><GPF:mode>2"
@@ -3814,10 +3294,7 @@
         (float:GPF (match_operand:GPI 1 "register_operand" "r")))]
   "TARGET_FLOAT"
   "scvtf\\t%<GPF:s>0, %<GPI:w>1"
-  [(set_attr "v8type" "fcvti2f")
-   (set_attr "type" "f_cvti2f")
-   (set_attr "mode" "<GPF:MODE>")
-   (set_attr "mode2" "<GPI:MODE>")]
+  [(set_attr "type" "f_cvti2f")]
 )
 
 (define_insn "floatuns<GPI:mode><GPF:mode>2"
@@ -3825,10 +3302,7 @@
         (unsigned_float:GPF (match_operand:GPI 1 "register_operand" "r")))]
   "TARGET_FLOAT"
   "ucvtf\\t%<GPF:s>0, %<GPI:w>1"
-  [(set_attr "v8type" "fcvt")
-   (set_attr "type" "f_cvt")
-   (set_attr "mode" "<GPF:MODE>")
-   (set_attr "mode2" "<GPI:MODE>")]
+  [(set_attr "type" "f_cvt")]
 )
 
 ;; -------------------------------------------------------------------
@@ -3842,9 +3316,7 @@
          (match_operand:GPF 2 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fadd\\t%<s>0, %<s>1, %<s>2"
-  [(set_attr "v8type" "fadd")
-   (set_attr "type" "fadd<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "fadd<s>")]
 )
 
 (define_insn "sub<mode>3"
@@ -3854,9 +3326,7 @@
          (match_operand:GPF 2 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fsub\\t%<s>0, %<s>1, %<s>2"
-  [(set_attr "v8type" "fadd")
-   (set_attr "type" "fadd<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "fadd<s>")]
 )
 
 (define_insn "mul<mode>3"
@@ -3866,9 +3336,7 @@
          (match_operand:GPF 2 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fmul\\t%<s>0, %<s>1, %<s>2"
-  [(set_attr "v8type" "fmul")
-   (set_attr "type" "fmul<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "fmul<s>")]
 )
 
 (define_insn "*fnmul<mode>3"
@@ -3878,9 +3346,7 @@
 		 (match_operand:GPF 2 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fnmul\\t%<s>0, %<s>1, %<s>2"
-  [(set_attr "v8type" "fmul")
-   (set_attr "type" "fmul<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "fmul<s>")]
 )
 
 (define_insn "div<mode>3"
@@ -3890,9 +3356,7 @@
          (match_operand:GPF 2 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fdiv\\t%<s>0, %<s>1, %<s>2"
-  [(set_attr "v8type" "fdiv")
-   (set_attr "type" "fdiv<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "fdiv<s>")]
 )
 
 (define_insn "neg<mode>2"
@@ -3900,9 +3364,7 @@
         (neg:GPF (match_operand:GPF 1 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fneg\\t%<s>0, %<s>1"
-  [(set_attr "v8type" "ffarith")
-   (set_attr "type" "ffarith<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "ffarith<s>")]
 )
 
 (define_insn "sqrt<mode>2"
@@ -3910,9 +3372,7 @@
         (sqrt:GPF (match_operand:GPF 1 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fsqrt\\t%<s>0, %<s>1"
-  [(set_attr "v8type" "fsqrt")
-   (set_attr "type" "fsqrt<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "fsqrt<s>")]
 )
 
 (define_insn "abs<mode>2"
@@ -3920,9 +3380,7 @@
         (abs:GPF (match_operand:GPF 1 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fabs\\t%<s>0, %<s>1"
-  [(set_attr "v8type" "ffarith")
-   (set_attr "type" "ffarith<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "ffarith<s>")]
 )
 
 ;; Given that smax/smin do not specify the result when either input is NaN,
@@ -3935,9 +3393,7 @@
 		  (match_operand:GPF 2 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fmaxnm\\t%<s>0, %<s>1, %<s>2"
-  [(set_attr "v8type" "fminmax")
-   (set_attr "type" "f_minmax<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "f_minmax<s>")]
 )
 
 (define_insn "smin<mode>3"
@@ -3946,9 +3402,7 @@
 		  (match_operand:GPF 2 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fminnm\\t%<s>0, %<s>1, %<s>2"
-  [(set_attr "v8type" "fminmax")
-   (set_attr "type" "f_minmax<s>")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "f_minmax<s>")]
 )
 
 ;; -------------------------------------------------------------------
@@ -3974,15 +3428,13 @@
 ;; The following secondary reload helpers patterns are invoked
 ;; after or during reload as we don't want these patterns to start
 ;; kicking in during the combiner.
- 
+
 (define_insn "aarch64_movdi_<mode>low"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (truncate:DI (match_operand:TX 1 "register_operand" "w")))]
   "reload_completed || reload_in_progress"
   "fmov\\t%x0, %d1"
-  [(set_attr "v8type" "fmovf2i")
-   (set_attr "type" "f_mrc")
-   (set_attr "mode"   "DI")
+  [(set_attr "type" "f_mrc")
    (set_attr "length" "4")
   ])
 
@@ -3993,9 +3445,7 @@
 		       (const_int 64))))]
   "reload_completed || reload_in_progress"
   "fmov\\t%x0, %1.d[1]"
-  [(set_attr "v8type" "fmovf2i")
-   (set_attr "type" "f_mrc")
-   (set_attr "mode"   "DI")
+  [(set_attr "type" "f_mrc")
    (set_attr "length" "4")
   ])
 
@@ -4005,9 +3455,7 @@
         (zero_extend:TX (match_operand:DI 1 "register_operand" "r")))]
   "reload_completed || reload_in_progress"
   "fmov\\t%0.d[1], %x1"
-  [(set_attr "v8type" "fmovi2f")
-   (set_attr "type" "f_mcr")
-   (set_attr "mode"   "DI")
+  [(set_attr "type" "f_mcr")
    (set_attr "length" "4")
   ])
 
@@ -4016,9 +3464,7 @@
         (zero_extend:TX (match_operand:DI 1 "register_operand" "r")))]
   "reload_completed || reload_in_progress"
   "fmov\\t%d0, %x1"
-  [(set_attr "v8type" "fmovi2f")
-   (set_attr "type" "f_mcr")
-   (set_attr "mode"   "DI")
+  [(set_attr "type" "f_mcr")
    (set_attr "length" "4")
   ])
 
@@ -4028,9 +3474,7 @@
 	  (truncate:DI (match_operand:TI 1 "register_operand" "w"))))]
   "reload_completed || reload_in_progress"
   "fmov\\t%d0, %d1"
-  [(set_attr "v8type" "fmovi2f")
-   (set_attr "type" "f_mcr")
-   (set_attr "mode"   "DI")
+  [(set_attr "type" "f_mcr")
    (set_attr "length" "4")
   ])
 
@@ -4061,9 +3505,7 @@
 		  (match_operand 2 "aarch64_valid_symref" "S")))]
   ""
   "add\\t%<w>0, %<w>1, :lo12:%a2"
-  [(set_attr "v8type" "alu")
-   (set_attr "type" "alu_reg")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "alu_reg")]
 )
 
 (define_insn "ldr_got_small_<mode>"
@@ -4074,9 +3516,7 @@
 		    UNSPEC_GOTSMALLPIC))]
   ""
   "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
-  [(set_attr "v8type" "load1")
-   (set_attr "type" "load1")
-   (set_attr "mode" "<MODE>")]
+  [(set_attr "type" "load1")]
 )
 
 (define_insn "ldr_got_small_sidi"
@@ -4088,9 +3528,7 @@
 		    UNSPEC_GOTSMALLPIC)))]
   "TARGET_ILP32"
   "ldr\\t%w0, [%1, #:got_lo12:%a2]"
-  [(set_attr "v8type" "load1")
-   (set_attr "type" "load1")
-   (set_attr "mode" "DI")]
+  [(set_attr "type" "load1")]
 )
 
 (define_insn "ldr_got_tiny"
@@ -4099,9 +3537,7 @@
 		   UNSPEC_GOTTINYPIC))]
   ""
   "ldr\\t%0, %L1"
-  [(set_attr "v8type" "load1")
-   (set_attr "type" "load1")
-   (set_attr "mode" "DI")]
+  [(set_attr "type" "load1")]
 )
 
 (define_insn "aarch64_load_tp_hard"
@@ -4109,9 +3545,7 @@
 	(unspec:DI [(const_int 0)] UNSPEC_TLS))]
   ""
   "mrs\\t%0, tpidr_el0"
-  [(set_attr "v8type" "mrs")
-   (set_attr "type" "mrs")
-   (set_attr "mode" "DI")]
+  [(set_attr "type" "mrs")]
 )
 
 ;; The TLS ABI specifically requires that the compiler does not schedule
@@ -4135,8 +3569,7 @@
   ]
   ""
   "adrp\\tx0, %A1\;add\\tx0, x0, %L1\;bl\\t%2\;nop"
-  [(set_attr "v8type" "call")
-   (set_attr "type" "call")
+  [(set_attr "type" "call")
    (set_attr "length" "16")])
 
 (define_insn "tlsie_small"
@@ -4145,9 +3578,7 @@
 		   UNSPEC_GOTSMALLTLS))]
   ""
   "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]"
-  [(set_attr "v8type" "load1")
-   (set_attr "type" "load1")
-   (set_attr "mode" "DI")
+  [(set_attr "type" "load1")
    (set_attr "length" "8")]
 )
 
@@ -4158,9 +3589,7 @@
 		   UNSPEC_GOTSMALLTLS))]
   ""
   "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2"
-  [(set_attr "v8type" "alu")
-   (set_attr "type" "alu_reg")
-   (set_attr "mode" "DI")
+  [(set_attr "type" "alu_reg")
    (set_attr "length" "8")]
 )
 
@@ -4172,8 +3601,7 @@
    (clobber (match_scratch:DI 1 "=r"))]
   "TARGET_TLS_DESC"
   "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
-  [(set_attr "v8type" "call")
-   (set_attr "type" "call")
+  [(set_attr "type" "call")
    (set_attr "length" "16")])
 
 (define_insn "stack_tie"
diff --git a/gcc/config/aarch64/large.md b/gcc/config/aarch64/large.md
deleted file mode 100644
index 4316cc7dfaf..00000000000
--- a/gcc/config/aarch64/large.md
+++ /dev/null
@@ -1,312 +0,0 @@
-;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
-;;
-;; Contributed by ARM Ltd.
-;;
-;; This file is part of GCC.
-;;
-;; GCC is free software; you can redistribute it and/or modify it
-;; under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
-;;
-;; GCC is distributed in the hope that it will be useful, but
-;; WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;; General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-;; In the absence of any ARMv8-A implementations, two examples derived
-;; from ARM's most recent ARMv7-A cores (Cortex-A7 and Cortex-A15) are
-;; included by way of example.  This is a temporary measure.
-
-;; Example pipeline description for an example 'large' core
-;; implementing AArch64
-
-;;-------------------------------------------------------
-;; General Description
-;;-------------------------------------------------------
-
-(define_automaton "large_cpu")
-
-;; The core is modelled as a triple issue pipeline that has
-;; the following dispatch units.
-;; 1. Two pipelines for simple integer operations: int1, int2
-;; 2. Two pipelines for SIMD and FP data-processing operations: fpsimd1, fpsimd2
-;; 3. One pipeline for branch operations: br
-;; 4. One pipeline for integer multiply and divide operations: multdiv
-;; 5. Two pipelines for load and store operations: ls1, ls2
-;;
-;; We can issue into three pipelines per-cycle.
-;;
-;; We assume that where we have unit pairs xxx1 is always filled before xxx2.
-
-;;-------------------------------------------------------
-;; CPU Units and Reservations
-;;-------------------------------------------------------
-
-;; The three issue units
-(define_cpu_unit "large_cpu_unit_i1, large_cpu_unit_i2, large_cpu_unit_i3" "large_cpu")
-
-(define_reservation "large_cpu_resv_i1"
-		    "(large_cpu_unit_i1 | large_cpu_unit_i2 | large_cpu_unit_i3)")
-
-(define_reservation "large_cpu_resv_i2"
-		    "((large_cpu_unit_i1 + large_cpu_unit_i2) | (large_cpu_unit_i2 + large_cpu_unit_i3))")
-
-(define_reservation "large_cpu_resv_i3"
-		    "(large_cpu_unit_i1 + large_cpu_unit_i2 + large_cpu_unit_i3)")
-
-(final_presence_set "large_cpu_unit_i2" "large_cpu_unit_i1")
-(final_presence_set "large_cpu_unit_i3" "large_cpu_unit_i2")
-
-;; The main dispatch units
-(define_cpu_unit "large_cpu_unit_int1, large_cpu_unit_int2" "large_cpu")
-(define_cpu_unit "large_cpu_unit_fpsimd1, large_cpu_unit_fpsimd2" "large_cpu")
-(define_cpu_unit "large_cpu_unit_ls1, large_cpu_unit_ls2" "large_cpu")
-(define_cpu_unit "large_cpu_unit_br" "large_cpu")
-(define_cpu_unit "large_cpu_unit_multdiv" "large_cpu")
-
-(define_reservation "large_cpu_resv_ls" "(large_cpu_unit_ls1 | large_cpu_unit_ls2)")
-
-;; The extended load-store pipeline
-(define_cpu_unit "large_cpu_unit_load, large_cpu_unit_store" "large_cpu")
-
-;; The extended ALU pipeline
-(define_cpu_unit "large_cpu_unit_int1_alu, large_cpu_unit_int2_alu" "large_cpu")
-(define_cpu_unit "large_cpu_unit_int1_shf, large_cpu_unit_int2_shf" "large_cpu")
-(define_cpu_unit "large_cpu_unit_int1_sat, large_cpu_unit_int2_sat" "large_cpu")
-
-
-;;-------------------------------------------------------
-;; Simple ALU Instructions
-;;-------------------------------------------------------
-
-;; Simple ALU operations without shift
-(define_insn_reservation "large_cpu_alu" 2
-  (and (eq_attr "tune" "large") (eq_attr "v8type" "adc,alu,alu_ext"))
-  "large_cpu_resv_i1, \
-   (large_cpu_unit_int1, large_cpu_unit_int1_alu) |\
-     (large_cpu_unit_int2, large_cpu_unit_int2_alu)")
-
-(define_insn_reservation "large_cpu_logic" 2
-  (and (eq_attr "tune" "large") (eq_attr "v8type" "logic,logic_imm"))
-  "large_cpu_resv_i1, \
-   (large_cpu_unit_int1, large_cpu_unit_int1_alu) |\
-     (large_cpu_unit_int2, large_cpu_unit_int2_alu)")
-
-(define_insn_reservation "large_cpu_shift" 2
-  (and (eq_attr "tune" "large") (eq_attr "v8type" "shift,shift_imm"))
-  "large_cpu_resv_i1, \
-   (large_cpu_unit_int1, large_cpu_unit_int1_shf) |\
-     (large_cpu_unit_int2, large_cpu_unit_int2_shf)")
-
-;; Simple ALU operations with immediate shift
-(define_insn_reservation "large_cpu_alu_shift" 3
-  (and (eq_attr "tune" "large") (eq_attr "v8type" "alu_shift"))
-  "large_cpu_resv_i1, \
-   (large_cpu_unit_int1,
-     large_cpu_unit_int1 + large_cpu_unit_int1_shf, large_cpu_unit_int1_alu) | \
-   (large_cpu_unit_int2,
-     large_cpu_unit_int2 + large_cpu_unit_int2_shf, large_cpu_unit_int2_alu)")
-
-(define_insn_reservation "large_cpu_logic_shift" 3
-  (and (eq_attr "tune" "large") (eq_attr "v8type" "logic_shift"))
-  "large_cpu_resv_i1, \
-   (large_cpu_unit_int1, large_cpu_unit_int1_alu) |\
-     (large_cpu_unit_int2, large_cpu_unit_int2_alu)")
-
-
-;;-------------------------------------------------------
-;; Multiplication/Division
-;;-------------------------------------------------------
-
-;; Simple multiplication
-(define_insn_reservation "large_cpu_mult_single" 3
-  (and (eq_attr "tune" "large")
-       (and (eq_attr "v8type" "mult,madd") (eq_attr "mode" "SI")))
-  "large_cpu_resv_i1, large_cpu_unit_multdiv")
-
-(define_insn_reservation "large_cpu_mult_double" 4
-  (and (eq_attr "tune" "large")
-       (and (eq_attr "v8type" "mult,madd") (eq_attr "mode" "DI")))
-  "large_cpu_resv_i1, large_cpu_unit_multdiv")
-
-;; 64-bit multiplication
-(define_insn_reservation "large_cpu_mull" 4
-  (and (eq_attr "tune" "large") (eq_attr "v8type" "mull,mulh,maddl"))
-  "large_cpu_resv_i1, large_cpu_unit_multdiv * 2")
-
-;; Division
-(define_insn_reservation "large_cpu_udiv_single" 9
-  (and (eq_attr "tune" "large")
-       (and (eq_attr "v8type" "udiv") (eq_attr "mode" "SI")))
-  "large_cpu_resv_i1, large_cpu_unit_multdiv")
-
-(define_insn_reservation "large_cpu_udiv_double" 18
-  (and (eq_attr "tune" "large")
-       (and (eq_attr "v8type" "udiv") (eq_attr "mode" "DI")))
-  "large_cpu_resv_i1, large_cpu_unit_multdiv")
-
-(define_insn_reservation "large_cpu_sdiv_single" 10
-  (and (eq_attr "tune" "large")
-       (and (eq_attr "v8type" "sdiv") (eq_attr "mode" "SI")))
-  "large_cpu_resv_i1, large_cpu_unit_multdiv")
-
-(define_insn_reservation "large_cpu_sdiv_double" 20
-  (and (eq_attr "tune" "large")
-       (and (eq_attr "v8type" "sdiv") (eq_attr "mode" "DI")))
-  "large_cpu_resv_i1, large_cpu_unit_multdiv")
-
-
-;;-------------------------------------------------------
-;; Branches
-;;-------------------------------------------------------
-
-;; Branches take one issue slot.
-;; No latency as there is no result
-(define_insn_reservation "large_cpu_branch" 0
-  (and (eq_attr "tune" "large") (eq_attr "v8type" "branch"))
-  "large_cpu_resv_i1, large_cpu_unit_br")
-
-
-;; Calls take up all issue slots, and form a block in the
-;; pipeline.  The result however is available the next cycle.
-;; Addition of new units requires this to be updated.
-(define_insn_reservation "large_cpu_call" 1
-  (and (eq_attr "tune" "large") (eq_attr "v8type" "call"))
-  "large_cpu_resv_i3 | large_cpu_resv_i2, \
-   large_cpu_unit_int1 + large_cpu_unit_int2 + large_cpu_unit_br + \
-     large_cpu_unit_multdiv + large_cpu_unit_fpsimd1 + large_cpu_unit_fpsimd2 + \
-     large_cpu_unit_ls1 + large_cpu_unit_ls2,\
-   large_cpu_unit_int1_alu + large_cpu_unit_int1_shf + large_cpu_unit_int1_sat + \
-     large_cpu_unit_int2_alu + large_cpu_unit_int2_shf + \
-     large_cpu_unit_int2_sat + large_cpu_unit_load + large_cpu_unit_store")
-
-
-;;-------------------------------------------------------
-;; Load/Store Instructions
-;;-------------------------------------------------------
-
-;; Loads of up to two words.
-(define_insn_reservation "large_cpu_load1" 4
-  (and (eq_attr "tune" "large") (eq_attr "v8type" "load_acq,load1,load2"))
-  "large_cpu_resv_i1, large_cpu_resv_ls, large_cpu_unit_load, nothing")
-
-;; Stores of up to two words.
-(define_insn_reservation "large_cpu_store1" 0
-  (and (eq_attr "tune" "large") (eq_attr "v8type" "store_rel,store1,store2"))
-  "large_cpu_resv_i1, large_cpu_resv_ls, large_cpu_unit_store")
-
-
-;;-------------------------------------------------------
-;; Floating-point arithmetic.
-;;-------------------------------------------------------
-
-(define_insn_reservation "large_cpu_fpalu" 4
-  (and (eq_attr "tune" "large")
-       (eq_attr "v8type" "ffarith,fadd,fccmp,fcvt,fcmp"))
-  "large_cpu_resv_i1 + large_cpu_unit_fpsimd1")
-
-(define_insn_reservation "large_cpu_fconst" 3
-  (and (eq_attr "tune" "large")
-       (eq_attr "v8type" "fconst"))
-  "large_cpu_resv_i1 + large_cpu_unit_fpsimd1")
-
-(define_insn_reservation "large_cpu_fpmuls" 4
-  (and (eq_attr "tune" "large")
-       (and (eq_attr "v8type" "fmul,fmadd") (eq_attr "mode" "SF")))
-  "large_cpu_resv_i1 + large_cpu_unit_fpsimd1")
-
-(define_insn_reservation "large_cpu_fpmuld" 7
-  (and (eq_attr "tune" "large")
-       (and (eq_attr "v8type" "fmul,fmadd") (eq_attr "mode" "DF")))
-  "large_cpu_resv_i1 + large_cpu_unit_fpsimd1, large_cpu_unit_fpsimd1 * 2,\
-   large_cpu_resv_i1 + large_cpu_unit_fpsimd1")
-
-
-;;-------------------------------------------------------
-;; Floating-point Division
-;;-------------------------------------------------------
-
-;; Single-precision divide takes 14 cycles to complete, and this
-;; includes the time taken for the special instruction used to collect the
-;; result to travel down the multiply pipeline.
-
-(define_insn_reservation "large_cpu_fdivs" 14
-  (and (eq_attr "tune" "large")
-       (and (eq_attr "v8type" "fdiv,fsqrt") (eq_attr "mode" "SF")))
-  "large_cpu_resv_i1, large_cpu_unit_fpsimd1 * 13")
-
-(define_insn_reservation "large_cpu_fdivd" 29
-  (and (eq_attr "tune" "large")
-       (and (eq_attr "v8type" "fdiv,fsqrt") (eq_attr "mode" "DF")))
-  "large_cpu_resv_i1, large_cpu_unit_fpsimd1 * 28")
-
-
-
-;;-------------------------------------------------------
-;; Floating-point Transfers
-;;-------------------------------------------------------
-
-(define_insn_reservation "large_cpu_i2f" 4
-  (and (eq_attr "tune" "large")
-       (eq_attr "v8type" "fmovi2f"))
-  "large_cpu_resv_i1")
-
-(define_insn_reservation "large_cpu_f2i" 2
-  (and (eq_attr "tune" "large")
-       (eq_attr "v8type" "fmovf2i"))
-  "large_cpu_resv_i1")
-
-
-;;-------------------------------------------------------
-;; Floating-point Load/Store
-;;-------------------------------------------------------
-
-(define_insn_reservation "large_cpu_floads" 4
-  (and (eq_attr "tune" "large")
-       (and (eq_attr "v8type" "fpsimd_load,fpsimd_load2") (eq_attr "mode" "SF")))
-  "large_cpu_resv_i1")
-
-(define_insn_reservation "large_cpu_floadd" 5
-  (and (eq_attr "tune" "large")
-       (and (eq_attr "v8type" "fpsimd_load,fpsimd_load2") (eq_attr "mode" "DF")))
-  "large_cpu_resv_i1 + large_cpu_unit_br, large_cpu_resv_i1")
-
-(define_insn_reservation "large_cpu_fstores" 0
-  (and (eq_attr "tune" "large")
-       (and (eq_attr "v8type" "fpsimd_store,fpsimd_store2") (eq_attr "mode" "SF")))
-  "large_cpu_resv_i1")
-
-(define_insn_reservation "large_cpu_fstored" 0
-  (and (eq_attr "tune" "large")
-       (and (eq_attr "v8type" "fpsimd_store,fpsimd_store2") (eq_attr "mode" "DF")))
-  "large_cpu_resv_i1 + large_cpu_unit_br, large_cpu_resv_i1")
-
-
-;;-------------------------------------------------------
-;; Bypasses
-;;-------------------------------------------------------
-
-(define_bypass 1 "large_cpu_alu, large_cpu_logic, large_cpu_shift"
-  "large_cpu_alu, large_cpu_alu_shift, large_cpu_logic, large_cpu_logic_shift, large_cpu_shift")
-
-(define_bypass 2 "large_cpu_alu_shift, large_cpu_logic_shift"
-  "large_cpu_alu, large_cpu_alu_shift, large_cpu_logic, large_cpu_logic_shift, large_cpu_shift")
-
-(define_bypass 1 "large_cpu_alu, large_cpu_logic, large_cpu_shift" "large_cpu_load1")
-
-(define_bypass 2 "large_cpu_alu_shift, large_cpu_logic_shift" "large_cpu_load1")
-
-(define_bypass 2 "large_cpu_floads"
-                 "large_cpu_fpalu, large_cpu_fpmuld,\
-		  large_cpu_fdivs, large_cpu_fdivd,\
-		  large_cpu_f2i")
-
-(define_bypass 3 "large_cpu_floadd"
-                 "large_cpu_fpalu, large_cpu_fpmuld,\
-		  large_cpu_fdivs, large_cpu_fdivd,\
-		  large_cpu_f2i")
diff --git a/gcc/config/aarch64/small.md b/gcc/config/aarch64/small.md
deleted file mode 100644
index a19083ccff2..00000000000
--- a/gcc/config/aarch64/small.md
+++ /dev/null
@@ -1,287 +0,0 @@
-;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
-;;
-;; Contributed by ARM Ltd.
-;;
-;; This file is part of GCC.
-;;
-;; GCC is free software; you can redistribute it and/or modify it
-;; under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
-;;
-;; GCC is distributed in the hope that it will be useful, but
-;; WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;; General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-;; In the absence of any ARMv8-A implementations, two examples derived
-;; from ARM's most recent ARMv7-A cores (Cortex-A7 and Cortex-A15) are
-;; included by way of example.  This is a temporary measure.
-
-;; Example pipeline description for an example 'small' core
-;; implementing AArch64
-
-;;-------------------------------------------------------
-;; General Description
-;;-------------------------------------------------------
-
-(define_automaton "small_cpu")
-
-;; The core is modelled as a single issue pipeline with the following
-;; dispatch units.
-;; 1. One pipeline for simple intructions.
-;; 2. One pipeline for branch intructions.
-;;
-;; There are five pipeline stages.
-;; The decode/issue stages operate the same for all instructions.
-;; Instructions always advance one stage per cycle in order.
-;; Only branch instructions may dual-issue with other instructions, except
-;; when those instructions take multiple cycles to issue.
-
-
-;;-------------------------------------------------------
-;; CPU Units and Reservations
-;;-------------------------------------------------------
-
-(define_cpu_unit "small_cpu_unit_i" "small_cpu")
-(define_cpu_unit "small_cpu_unit_br" "small_cpu")
-
-;; Pseudo-unit for blocking the multiply pipeline when a double-precision
-;; multiply is in progress.
-(define_cpu_unit "small_cpu_unit_fpmul_pipe" "small_cpu")
-
-;; The floating-point add pipeline, used to model the usage
-;; of the add pipeline by fp alu instructions.
-(define_cpu_unit "small_cpu_unit_fpadd_pipe" "small_cpu")
-
-;; Floating-point division pipeline (long latency, out-of-order completion).
-(define_cpu_unit "small_cpu_unit_fpdiv" "small_cpu")
-
-
-;;-------------------------------------------------------
-;; Simple ALU Instructions
-;;-------------------------------------------------------
-
-;; Simple ALU operations without shift
-(define_insn_reservation "small_cpu_alu" 2
-  (and (eq_attr "tune" "small")
-       (eq_attr "v8type" "adc,alu,alu_ext"))
-  "small_cpu_unit_i")
-
-(define_insn_reservation "small_cpu_logic" 2
-  (and (eq_attr "tune" "small")
-       (eq_attr "v8type" "logic,logic_imm"))
-  "small_cpu_unit_i")
-
-(define_insn_reservation "small_cpu_shift" 2
-  (and (eq_attr "tune" "small")
-       (eq_attr "v8type" "shift,shift_imm"))
-  "small_cpu_unit_i")
-
-;; Simple ALU operations with immediate shift
-(define_insn_reservation "small_cpu_alu_shift" 2
-  (and (eq_attr "tune" "small")
-       (eq_attr "v8type" "alu_shift"))
-  "small_cpu_unit_i")
-
-(define_insn_reservation "small_cpu_logic_shift" 2
-  (and (eq_attr "tune" "small")
-       (eq_attr "v8type" "logic_shift"))
-  "small_cpu_unit_i")
-
-
-;;-------------------------------------------------------
-;; Multiplication/Division
-;;-------------------------------------------------------
-
-;; Simple multiplication
-(define_insn_reservation "small_cpu_mult_single" 2
-  (and (eq_attr "tune" "small")
-       (and (eq_attr "v8type" "mult,madd") (eq_attr "mode" "SI")))
-  "small_cpu_unit_i")
-
-(define_insn_reservation "small_cpu_mult_double" 3
-  (and (eq_attr "tune" "small")
-       (and (eq_attr "v8type" "mult,madd") (eq_attr "mode" "DI")))
-  "small_cpu_unit_i")
-
-;; 64-bit multiplication
-(define_insn_reservation "small_cpu_mull" 3
-  (and (eq_attr "tune" "small") (eq_attr "v8type" "mull,mulh,maddl"))
-  "small_cpu_unit_i * 2")
-
-;; Division
-(define_insn_reservation "small_cpu_udiv_single" 5
-  (and (eq_attr "tune" "small")
-       (and (eq_attr "v8type" "udiv") (eq_attr "mode" "SI")))
-  "small_cpu_unit_i")
-
-(define_insn_reservation "small_cpu_udiv_double" 10
-  (and (eq_attr "tune" "small")
-       (and (eq_attr "v8type" "udiv") (eq_attr "mode" "DI")))
-  "small_cpu_unit_i")
-
-(define_insn_reservation "small_cpu_sdiv_single" 6
-  (and (eq_attr "tune" "small")
-       (and (eq_attr "v8type" "sdiv") (eq_attr "mode" "SI")))
-  "small_cpu_unit_i")
-
-(define_insn_reservation "small_cpu_sdiv_double" 12
-  (and (eq_attr "tune" "small")
-       (and (eq_attr "v8type" "sdiv") (eq_attr "mode" "DI")))
-  "small_cpu_unit_i")
-
-
-;;-------------------------------------------------------
-;; Load/Store Instructions
-;;-------------------------------------------------------
-
-(define_insn_reservation "small_cpu_load1" 2
-  (and (eq_attr "tune" "small")
-       (eq_attr "v8type" "load_acq,load1"))
-  "small_cpu_unit_i")
-
-(define_insn_reservation "small_cpu_store1" 0
-  (and (eq_attr "tune" "small")
-       (eq_attr "v8type" "store_rel,store1"))
-  "small_cpu_unit_i")
-
-(define_insn_reservation "small_cpu_load2" 3
-  (and (eq_attr "tune" "small")
-       (eq_attr "v8type" "load2"))
-  "small_cpu_unit_i + small_cpu_unit_br, small_cpu_unit_i")
-
-(define_insn_reservation "small_cpu_store2" 0
-  (and (eq_attr "tune" "small")
-       (eq_attr "v8type" "store2"))
-  "small_cpu_unit_i + small_cpu_unit_br, small_cpu_unit_i")
-
-
-;;-------------------------------------------------------
-;; Branches
-;;-------------------------------------------------------
-
-;; Direct branches are the only instructions that can dual-issue.
-;; The latency here represents when the branch actually takes place.
-
-(define_insn_reservation "small_cpu_unit_br" 3
-  (and (eq_attr "tune" "small")
-       (eq_attr "v8type" "branch,call"))
-  "small_cpu_unit_br")
-
-
-;;-------------------------------------------------------
-;; Floating-point arithmetic.
-;;-------------------------------------------------------
-
-(define_insn_reservation "small_cpu_fpalu" 4
-  (and (eq_attr "tune" "small")
-       (eq_attr "v8type" "ffarith,fadd,fccmp,fcvt,fcmp"))
-  "small_cpu_unit_i + small_cpu_unit_fpadd_pipe")
-
-(define_insn_reservation "small_cpu_fconst" 3
-  (and (eq_attr "tune" "small")
-       (eq_attr "v8type" "fconst"))
-  "small_cpu_unit_i + small_cpu_unit_fpadd_pipe")
-
-(define_insn_reservation "small_cpu_fpmuls" 4
-  (and (eq_attr "tune" "small")
-       (and (eq_attr "v8type" "fmul") (eq_attr "mode" "SF")))
-  "small_cpu_unit_i + small_cpu_unit_fpmul_pipe")
-
-(define_insn_reservation "small_cpu_fpmuld" 7
-  (and (eq_attr "tune" "small")
-       (and (eq_attr "v8type" "fmul") (eq_attr "mode" "DF")))
-  "small_cpu_unit_i + small_cpu_unit_fpmul_pipe, small_cpu_unit_fpmul_pipe * 2,\
-   small_cpu_unit_i + small_cpu_unit_fpmul_pipe")
-
-
-;;-------------------------------------------------------
-;; Floating-point Division
-;;-------------------------------------------------------
-
-;; Single-precision divide takes 14 cycles to complete, and this
-;; includes the time taken for the special instruction used to collect the
-;; result to travel down the multiply pipeline.
-
-(define_insn_reservation "small_cpu_fdivs" 14
-  (and (eq_attr "tune" "small")
-       (and (eq_attr "v8type" "fdiv,fsqrt") (eq_attr "mode" "SF")))
-  "small_cpu_unit_i, small_cpu_unit_fpdiv * 13")
-
-(define_insn_reservation "small_cpu_fdivd" 29
-  (and (eq_attr "tune" "small")
-       (and (eq_attr "v8type" "fdiv,fsqrt") (eq_attr "mode" "DF")))
-  "small_cpu_unit_i, small_cpu_unit_fpdiv * 28")
-
-
-;;-------------------------------------------------------
-;; Floating-point Transfers
-;;-------------------------------------------------------
-
-(define_insn_reservation "small_cpu_i2f" 4
-  (and (eq_attr "tune" "small")
-       (eq_attr "v8type" "fmovi2f"))
-  "small_cpu_unit_i")
-
-(define_insn_reservation "small_cpu_f2i" 2
-  (and (eq_attr "tune" "small")
-       (eq_attr "v8type" "fmovf2i"))
-  "small_cpu_unit_i")
-
-
-;;-------------------------------------------------------
-;; Floating-point Load/Store
-;;-------------------------------------------------------
-
-(define_insn_reservation "small_cpu_floads" 4
-  (and (eq_attr "tune" "small")
-       (and (eq_attr "v8type" "fpsimd_load") (eq_attr "mode" "SF")))
-  "small_cpu_unit_i")
-
-(define_insn_reservation "small_cpu_floadd" 5
-  (and (eq_attr "tune" "small")
-       (and (eq_attr "v8type" "fpsimd_load") (eq_attr "mode" "DF")))
-  "small_cpu_unit_i + small_cpu_unit_br, small_cpu_unit_i")
-
-(define_insn_reservation "small_cpu_fstores" 0
-  (and (eq_attr "tune" "small")
-       (and (eq_attr "v8type" "fpsimd_store") (eq_attr "mode" "SF")))
-  "small_cpu_unit_i")
-
-(define_insn_reservation "small_cpu_fstored" 0
-  (and (eq_attr "tune" "small")
-       (and (eq_attr "v8type" "fpsimd_store") (eq_attr "mode" "DF")))
-  "small_cpu_unit_i + small_cpu_unit_br, small_cpu_unit_i")
-
-
-;;-------------------------------------------------------
-;; Bypasses
-;;-------------------------------------------------------
-
-;; Forwarding path for unshifted operands.
-
-(define_bypass 1 "small_cpu_alu, small_cpu_alu_shift" 
-  "small_cpu_alu, small_cpu_alu_shift, small_cpu_logic, small_cpu_logic_shift, small_cpu_shift")
-
-(define_bypass 1 "small_cpu_logic, small_cpu_logic_shift" 
-  "small_cpu_alu, small_cpu_alu_shift, small_cpu_logic, small_cpu_logic_shift, small_cpu_shift")
-
-(define_bypass 1 "small_cpu_shift" 
-  "small_cpu_alu, small_cpu_alu_shift, small_cpu_logic, small_cpu_logic_shift, small_cpu_shift")
-
-;; Load-to-use for floating-point values has a penalty of one cycle.
-
-(define_bypass 2 "small_cpu_floads"
-                 "small_cpu_fpalu, small_cpu_fpmuld,\
-		  small_cpu_fdivs, small_cpu_fdivd,\
-		  small_cpu_f2i")
-
-(define_bypass 3 "small_cpu_floadd"
-                 "small_cpu_fpalu, small_cpu_fpmuld,\
-		  small_cpu_fdivs, small_cpu_fdivd,\
-		  small_cpu_f2i")
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index e39fc7731e6..c55835e7fe1 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -25,6 +25,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
 #include "regs.h"
 #include "hard-reg-set.h"
 #include "insn-config.h"
@@ -49,7 +52,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "langhooks.h"
 #include "splay-tree.h"
 #include "gimple.h"
+#include "gimplify.h"
 #include "gimple-ssa.h"
+#include "stringpool.h"
 #include "tree-ssanames.h"
 #include "tree-stdarg.h"
 #include "tm-constrs.h"
@@ -4830,7 +4835,8 @@ alpha_gp_save_rtx (void)
 	 label.  Emit the sequence properly on the edge.  We are only
 	 invoked from dw2_build_landing_pads and finish_eh_generation
 	 will call commit_edge_insertions thanks to a kludge.  */
-      insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
+      insert_insn_on_edge (seq,
+			   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
 
       cfun->machine->gp_save_rtx = m;
     }
@@ -5859,7 +5865,7 @@ va_list_skip_additions (tree lhs)
       if (!CONVERT_EXPR_CODE_P (code)
 	  && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
 	      || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
-	      || !host_integerp (gimple_assign_rhs2 (stmt), 1)))
+	      || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
 	return stmt;
 
       lhs = gimple_assign_rhs1 (stmt);
@@ -5985,10 +5991,10 @@ alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
 	  else
 	    goto escapes;
 
-	  if (!host_integerp (gimple_assign_rhs2 (arg2_stmt), 0))
+	  if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
 	    goto escapes;
 
-	  sub = tree_low_cst (gimple_assign_rhs2 (arg2_stmt), 0);
+	  sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
 	  if (code2 == MINUS_EXPR)
 	    sub = -sub;
 	  if (sub < -48 || sub > -32)
diff --git a/gcc/config/alpha/predicates.md b/gcc/config/alpha/predicates.md
index a63d1254a6f..8a2166c1e03 100644
--- a/gcc/config/alpha/predicates.md
+++ b/gcc/config/alpha/predicates.md
@@ -357,7 +357,7 @@
 	    && !SYMBOL_REF_TLS_MODEL (op))
     {
       if (SYMBOL_REF_DECL (op))
-        max_ofs = tree_low_cst (DECL_SIZE_UNIT (SYMBOL_REF_DECL (op)), 1);
+        max_ofs = tree_to_uhwi (DECL_SIZE_UNIT (SYMBOL_REF_DECL (op)));
     }
   else
     return false;
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 7b1853b7f9b..77932ce567c 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -34,6 +34,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "stringpool.h"
+#include "calls.h"
 #include "rtl.h"
 #include "regs.h"
 #include "hard-reg-set.h"
@@ -632,6 +636,44 @@ make_pass_arc_ifcvt (gcc::context *ctxt)
   return new pass_arc_ifcvt (ctxt);
 }
 
+static unsigned arc_predicate_delay_insns (void);
+
+namespace {
+
+const pass_data pass_data_arc_predicate_delay_insns =
+{
+  RTL_PASS,
+  "arc_predicate_delay_insns",		/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  false,				/* has_gate */
+  true,					/* has_execute */
+  TV_IFCVT2,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_df_finish			/* todo_flags_finish */
+};
+
+class pass_arc_predicate_delay_insns : public rtl_opt_pass
+{
+public:
+  pass_arc_predicate_delay_insns(gcc::context *ctxt)
+  : rtl_opt_pass(pass_data_arc_predicate_delay_insns, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  unsigned int execute () { return arc_predicate_delay_insns (); }
+};
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_arc_predicate_delay_insns (gcc::context *ctxt)
+{
+  return new pass_arc_predicate_delay_insns (ctxt);
+}
+
 /* Called by OVERRIDE_OPTIONS to initialize various things.  */
 
 void
@@ -752,6 +794,16 @@ arc_init (void)
       register_pass (&arc_ifcvt4_info);
       register_pass (&arc_ifcvt5_info);
     }
+
+  if (flag_delayed_branch)
+    {
+      opt_pass *pass_arc_predicate_delay_insns
+	= make_pass_arc_predicate_delay_insns (g);
+      struct register_pass_info arc_predicate_delay_info
+	= { pass_arc_predicate_delay_insns, "dbr", 1, PASS_POS_INSERT_AFTER };
+
+      register_pass (&arc_predicate_delay_info);
+    }
 }
 
 /* Check ARC options, generate derived target attributes.  */
@@ -8297,6 +8349,74 @@ arc_ifcvt (void)
   return 0;
 }
 
+/* Find annulled delay insns and convert them to use the appropriate predicate.
+   This allows branch shortening to size up these insns properly.  */
+
+static unsigned
+arc_predicate_delay_insns (void)
+{
+  for (rtx insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      rtx pat, jump, dlay, src, cond, *patp;
+      int reverse;
+
+      if (!NONJUMP_INSN_P (insn)
+	  || GET_CODE (pat = PATTERN (insn)) != SEQUENCE)
+	continue;
+      jump = XVECEXP (pat, 0, 0);
+      dlay = XVECEXP (pat, 0, 1);
+      if (!JUMP_P (jump) || !INSN_ANNULLED_BRANCH_P (jump))
+	continue;
+      /* If the branch insn does the annulling, leave the delay insn alone.  */
+      if (!TARGET_AT_DBR_CONDEXEC && !INSN_FROM_TARGET_P (dlay))
+	continue;
+      /* ??? Could also leave DLAY un-conditionalized if its target is dead
+	 on the other path.  */
+      gcc_assert (GET_CODE (PATTERN (jump)) == SET);
+      gcc_assert (SET_DEST (PATTERN (jump)) == pc_rtx);
+      src = SET_SRC (PATTERN (jump));
+      gcc_assert (GET_CODE (src) == IF_THEN_ELSE);
+      cond = XEXP (src, 0);
+      if (XEXP (src, 2) == pc_rtx)
+	reverse = 0;
+      else if (XEXP (src, 1) == pc_rtx)
+	reverse = 1;
+      else
+	gcc_unreachable ();
+      if (!INSN_FROM_TARGET_P (dlay) != reverse)
+	{
+	  enum machine_mode ccm = GET_MODE (XEXP (cond, 0));
+	  enum rtx_code code = reverse_condition (GET_CODE (cond));
+	  if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
+	    code = reverse_condition_maybe_unordered (GET_CODE (cond));
+
+	  cond = gen_rtx_fmt_ee (code, GET_MODE (cond),
+				 copy_rtx (XEXP (cond, 0)),
+				 copy_rtx (XEXP (cond, 1)));
+	}
+      else
+	cond = copy_rtx (cond);
+      patp = &PATTERN (dlay);
+      pat = *patp;
+      /* dwarf2out.c:dwarf2out_frame_debug_expr doesn't know
+	 what to do with COND_EXEC.  */
+      if (RTX_FRAME_RELATED_P (dlay))
+	{
+	  /* As this is the delay slot insn of an anulled branch,
+	     dwarf2out.c:scan_trace understands the anulling semantics
+	     without the COND_EXEC.  */
+	  rtx note = alloc_reg_note (REG_FRAME_RELATED_EXPR, pat,
+				     REG_NOTES (dlay));
+	  validate_change (dlay, &REG_NOTES (dlay), note, 1);
+	}
+      pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
+      validate_change (dlay, patp, pat, 1);
+      if (!apply_change_group ())
+	gcc_unreachable ();
+    }
+  return 0;
+}
+
 /* For ARC600: If a write to a core reg >=32 appears in a delay slot
   (other than of a forward brcc), it creates a hazard when there is a read
   of the same register at the branch target.  We can't know what is at the
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index baf347876be..64b4162a877 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -4789,8 +4789,7 @@
     {
       /* ??? Can do better for when a scratch register
 	 is known.  But that would require extra testing.  */
-      arc_clear_unalign ();
-      return ".p2align 2\;push_s r0\;add r0,pcl,%4-.+2\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1-.+2\;sr r0,[3]; LP_END\;pop_s r0";
+      return "push_s r0\;add r0,pcl,%4-(.&-4)\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1-(.&-4)\;sr r0,[3]; LP_END\;pop_s r0";
     }
   /* Check if the loop end is in range to be set by the lp instruction.  */
   size = INTVAL (operands[3]) < 2 ? 0 : 2048;
diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md
index 088013bbdb7..795045377dd 100644
--- a/gcc/config/arc/constraints.md
+++ b/gcc/config/arc/constraints.md
@@ -338,7 +338,7 @@
   (and (match_code "REG")
        (match_test "TARGET_Rcq
 		    && !arc_ccfsm_cond_exec_p ()
-		    && ((((REGNO (op) & 7) ^ 4) - 4) & 15) == REGNO (op)")))
+		    && IN_RANGE (REGNO (op) ^ 4, 4, 11)")))
 
 ; If we need a reload, we generally want to steer reload to use three-address
 ; alternatives in preference of two-address alternatives, unless the
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index 841f544e83d..c3652a72c81 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -31,6 +31,7 @@ extern int arm_no_early_alu_shift_dep (rtx, rtx);
 extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
 extern int arm_no_early_mul_dep (rtx, rtx);
 extern int arm_no_early_store_addr_dep (rtx, rtx);
+extern bool arm_rtx_shift_left_p (rtx);
 
 /* RTX cost table definitions.  These are used when tuning for speed rather
    than for size and should reflect the _additional_ cost over the cost
diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
index 201e581a4a6..a46e6751a7b 100644
--- a/gcc/config/arm/aarch-common.c
+++ b/gcc/config/arm/aarch-common.c
@@ -40,7 +40,7 @@ typedef struct
 
 /* Return TRUE if X is either an arithmetic shift left, or
    is a multiplication by a power of two.  */
-static bool
+bool
 arm_rtx_shift_left_p (rtx x)
 {
   enum rtx_code code = GET_CODE (x);
diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h
new file mode 100644
index 00000000000..d3e7dd2d799
--- /dev/null
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -0,0 +1,227 @@
+/* RTX cost tables shared between arm and aarch64.
+
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH_COST_TABLES_H
+#define GCC_AARCH_COST_TABLES_H
+
+const struct cpu_cost_table generic_extra_costs =
+{
+  /* ALU */
+  {
+    0,			/* Arith.  */
+    0,			/* Logical.  */
+    0,			/* Shift.  */
+    COSTS_N_INSNS (1),	/* Shift_reg.  */
+    0,			/* Arith_shift.  */
+    COSTS_N_INSNS (1),	/* Arith_shift_reg.  */
+    0,			/* Log_shift.  */
+    COSTS_N_INSNS (1),	/* Log_shift_reg.  */
+    0,			/* Extend.  */
+    COSTS_N_INSNS (1),	/* Extend_arith.  */
+    0,			/* Bfi.  */
+    0,			/* Bfx.  */
+    0,			/* Clz.  */
+    COSTS_N_INSNS (1),	/* non_exec.  */
+    false		/* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (2),	/* Simple.  */
+      COSTS_N_INSNS (1),	/* Flag_setting.  */
+      COSTS_N_INSNS (2),	/* Extend.  */
+      COSTS_N_INSNS (3),	/* Add.  */
+      COSTS_N_INSNS (3),	/* Extend_add.  */
+      COSTS_N_INSNS (8)		/* Idiv.  */
+    },
+    /* MULT DImode */
+    {
+      0,			/* Simple (N/A).  */
+      0,			/* Flag_setting (N/A).  */
+      COSTS_N_INSNS (2),	/* Extend.  */
+      0,			/* Add (N/A).  */
+      COSTS_N_INSNS (3),	/* Extend_add.  */
+      0				/* Idiv (N/A).  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (2),	/* Load.  */
+    COSTS_N_INSNS (2),	/* Load_sign_extend.  */
+    COSTS_N_INSNS (3),	/* Ldrd.  */
+    COSTS_N_INSNS (2),	/* Ldm_1st.  */
+    1,			/* Ldm_regs_per_insn_1st.  */
+    1,			/* Ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (2),	/* Loadf.  */
+    COSTS_N_INSNS (3),	/* Loadd.  */
+    COSTS_N_INSNS (1),  /* Load_unaligned.  */
+    COSTS_N_INSNS (2),	/* Store.  */
+    COSTS_N_INSNS (3),	/* Strd.  */
+    COSTS_N_INSNS (2),	/* Stm_1st.  */
+    1,			/* Stm_regs_per_insn_1st.  */
+    1,			/* Stm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (2),	/* Storef.  */
+    COSTS_N_INSNS (3),	/* Stored.  */
+    COSTS_N_INSNS (1)  /* Store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (7),	/* Div.  */
+      COSTS_N_INSNS (2),	/* Mult.  */
+      COSTS_N_INSNS (3),	/* Mult_addsub.  */
+      COSTS_N_INSNS (3),	/* Fma.  */
+      COSTS_N_INSNS (1),	/* Addsub.  */
+      0,			/* Fpconst.  */
+      0,			/* Neg.  */
+      0,			/* Compare.  */
+      0,			/* Widen.  */
+      0,			/* Narrow.  */
+      0,			/* Toint.  */
+      0,			/* Fromint.  */
+      0				/* Roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (15),	/* Div.  */
+      COSTS_N_INSNS (5),	/* Mult.  */
+      COSTS_N_INSNS (7),	/* Mult_addsub.  */
+      COSTS_N_INSNS (7),	/* Fma.  */
+      COSTS_N_INSNS (3),	/* Addsub.  */
+      0,			/* Fpconst.  */
+      0,			/* Neg.  */
+      0,			/* Compare.  */
+      0,			/* Widen.  */
+      0,			/* Narrow.  */
+      0,			/* Toint.  */
+      0,			/* Fromint.  */
+      0				/* Roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)	/* Alu.  */
+  }
+};
+
+const struct cpu_cost_table cortexa53_extra_costs =
+{
+  /* ALU */
+  {
+    0,			/* Arith.  */
+    0,			/* Logical.  */
+    COSTS_N_INSNS (1),	/* Shift.  */
+    COSTS_N_INSNS (2),	/* Shift_reg.  */
+    COSTS_N_INSNS (1),	/* Arith_shift.  */
+    COSTS_N_INSNS (2),	/* Arith_shift_reg.  */
+    COSTS_N_INSNS (1),	/* Log_shift.  */
+    COSTS_N_INSNS (2),	/* Log_shift_reg.  */
+    0,			/* Extend.  */
+    COSTS_N_INSNS (1),	/* Extend_arith.  */
+    COSTS_N_INSNS (1),	/* Bfi.  */
+    COSTS_N_INSNS (1),	/* Bfx.  */
+    0,			/* Clz.  */
+    0,			/* non_exec.  */
+    true		/* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (1),	/* Simple.  */
+      COSTS_N_INSNS (2),	/* Flag_setting.  */
+      COSTS_N_INSNS (1),	/* Extend.  */
+      COSTS_N_INSNS (1),	/* Add.  */
+      COSTS_N_INSNS (1),	/* Extend_add.  */
+      COSTS_N_INSNS (7)		/* Idiv.  */
+    },
+    /* MULT DImode */
+    {
+      COSTS_N_INSNS (2),	/* Simple.  */
+      0,			/* Flag_setting (N/A).  */
+      COSTS_N_INSNS (2),	/* Extend.  */
+      COSTS_N_INSNS (2),	/* Add.  */
+      COSTS_N_INSNS (2),	/* Extend_add.  */
+      COSTS_N_INSNS (15)	/* Idiv.  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (1),		/* Load.  */
+    COSTS_N_INSNS (1),		/* Load_sign_extend.  */
+    COSTS_N_INSNS (1),		/* Ldrd.  */
+    COSTS_N_INSNS (1),		/* Ldm_1st.  */
+    1,				/* Ldm_regs_per_insn_1st.  */
+    2,				/* Ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (1),		/* Loadf.  */
+    COSTS_N_INSNS (1),		/* Loadd.  */
+    COSTS_N_INSNS (1),		/* Load_unaligned.  */
+    0,				/* Store.  */
+    0,				/* Strd.  */
+    0,				/* Stm_1st.  */
+    1,				/* Stm_regs_per_insn_1st.  */
+    2,				/* Stm_regs_per_insn_subsequent.  */
+    0,				/* Storef.  */
+    0,				/* Stored.  */
+    COSTS_N_INSNS (1)		/* Store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (15),	/* Div.  */
+      COSTS_N_INSNS (3),	/* Mult.  */
+      COSTS_N_INSNS (7),	/* Mult_addsub. */
+      COSTS_N_INSNS (7),	/* Fma.  */
+      COSTS_N_INSNS (3),	/* Addsub.  */
+      COSTS_N_INSNS (1),	/* Fpconst. */
+      COSTS_N_INSNS (2),	/* Neg.  */
+      COSTS_N_INSNS (1),	/* Compare.  */
+      COSTS_N_INSNS (3),	/* Widen.  */
+      COSTS_N_INSNS (3),	/* Narrow.  */
+      COSTS_N_INSNS (3),	/* Toint.  */
+      COSTS_N_INSNS (3),	/* Fromint.  */
+      COSTS_N_INSNS (3)		/* Roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (30),	/* Div.  */
+      COSTS_N_INSNS (3),	/* Mult.  */
+      COSTS_N_INSNS (7),	/* Mult_addsub.  */
+      COSTS_N_INSNS (7),	/* Fma.  */
+      COSTS_N_INSNS (3),	/* Addsub.  */
+      COSTS_N_INSNS (1),	/* Fpconst.  */
+      COSTS_N_INSNS (2),	/* Neg.  */
+      COSTS_N_INSNS (1),	/* Compare.  */
+      COSTS_N_INSNS (3),	/* Widen.  */
+      COSTS_N_INSNS (3),	/* Narrow.  */
+      COSTS_N_INSNS (3),	/* Toint.  */
+      COSTS_N_INSNS (3),	/* Fromint.  */
+      COSTS_N_INSNS (3)		/* Roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)	/* Alu.  */
+  }
+};
+
+
+#endif /* GCC_AARCH_COST_TABLES_H */
+
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index 79e2e87b72b..119dc336fe1 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -129,13 +129,13 @@ ARM_CORE("cortex-a7",	  cortexa7,	7A,				 FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV
 ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, cortex)
 ARM_CORE("cortex-a9",	  cortexa9,	7A,				 FL_LDSCHED, cortex_a9)
 ARM_CORE("cortex-a15",	  cortexa15,	7A,				 FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
-ARM_CORE("cortex-a53",	  cortexa53,	8A,				 FL_LDSCHED, cortex)
+ARM_CORE("cortex-a53",	  cortexa53,	8A,				 FL_LDSCHED, cortex_a53)
 ARM_CORE("cortex-r4",	  cortexr4,	7R,				 FL_LDSCHED, cortex)
 ARM_CORE("cortex-r4f",	  cortexr4f,	7R,				 FL_LDSCHED, cortex)
 ARM_CORE("cortex-r5",	  cortexr5,	7R,				 FL_LDSCHED | FL_ARM_DIV, cortex)
 ARM_CORE("cortex-r7",	  cortexr7,	7R,				 FL_LDSCHED | FL_ARM_DIV, cortex)
-ARM_CORE("cortex-m4",	  cortexm4,	7EM,				 FL_LDSCHED, cortex)
-ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, cortex)
+ARM_CORE("cortex-m4",	  cortexm4,	7EM,				 FL_LDSCHED, v7m)
+ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, v7m)
 ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, v6m)
 ARM_CORE("cortex-m0",	  cortexm0,	6M,				 FL_LDSCHED, v6m)
 ARM_CORE("cortex-m0plus", cortexm0plus,	6M,				 FL_LDSCHED, v6m)
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 5c534407461..0d68f018f4d 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -27,6 +27,10 @@
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
 #include "obstack.h"
 #include "regs.h"
 #include "hard-reg-set.h"
@@ -266,6 +270,7 @@ static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 static unsigned int arm_autovectorize_vector_sizes (void);
 static int arm_default_branch_cost (bool, bool);
 static int arm_cortex_a5_branch_cost (bool, bool);
+static int arm_cortex_m_branch_cost (bool, bool);
 
 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 					     const unsigned char *sel);
@@ -949,106 +954,9 @@ struct cpu_vec_costs arm_default_vec_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+/* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
+#include "aarch-cost-tables.h"
 
-const struct cpu_cost_table generic_extra_costs =
-{
-  /* ALU */
-  {
-    0,			/* Arith.  */
-    0,			/* Logical.  */
-    0,			/* Shift.  */
-    COSTS_N_INSNS (1),	/* Shift_reg.  */
-    0,			/* Arith_shift.  */
-    COSTS_N_INSNS (1),	/* Arith_shift_reg.  */
-    0,			/* Log_shift.  */
-    COSTS_N_INSNS (1),	/* Log_shift_reg.  */
-    0,			/* Extend.  */
-    COSTS_N_INSNS (1),	/* Extend_arith.  */
-    0,			/* Bfi.  */
-    0,			/* Bfx.  */
-    0,			/* Clz.  */
-    COSTS_N_INSNS (1),	/* non_exec.  */
-    false		/* non_exec_costs_exec.  */
-  },
-  /* MULT SImode */
-  {
-    {
-      COSTS_N_INSNS (2),	/* Simple.  */
-      COSTS_N_INSNS (1),	/* Flag_setting.  */
-      COSTS_N_INSNS (2),	/* Extend.  */
-      COSTS_N_INSNS (3),	/* Add.  */
-      COSTS_N_INSNS (3),	/* Extend_add.  */
-      COSTS_N_INSNS (8)		/* Idiv.  */
-    },
-    /* MULT DImode */
-    {
-      0,			/* Simple (N/A).  */
-      0,			/* Flag_setting (N/A).  */
-      COSTS_N_INSNS (2),	/* Extend.  */
-      0,			/* Add (N/A).  */
-      COSTS_N_INSNS (3),	/* Extend_add.  */
-      0				/* Idiv (N/A).  */
-    }
-  },
-  /* LD/ST */
-  {
-    COSTS_N_INSNS (2),	/* Load.  */
-    COSTS_N_INSNS (2),	/* Load_sign_extend.  */
-    COSTS_N_INSNS (3),	/* Ldrd.  */
-    COSTS_N_INSNS (2),	/* Ldm_1st.  */
-    1,			/* Ldm_regs_per_insn_1st.  */
-    1,			/* Ldm_regs_per_insn_subsequent.  */
-    COSTS_N_INSNS (2),	/* Loadf.  */
-    COSTS_N_INSNS (3),	/* Loadd.  */
-    COSTS_N_INSNS (1),  /* Load_unaligned.  */
-    COSTS_N_INSNS (2),	/* Store.  */
-    COSTS_N_INSNS (3),	/* Strd.  */
-    COSTS_N_INSNS (2),	/* Stm_1st.  */
-    1,			/* Stm_regs_per_insn_1st.  */
-    1,			/* Stm_regs_per_insn_subsequent.  */
-    COSTS_N_INSNS (2),	/* Storef.  */
-    COSTS_N_INSNS (3),	/* Stored.  */
-    COSTS_N_INSNS (1)  /* Store_unaligned.  */
-  },
-  {
-    /* FP SFmode */
-    {
-      COSTS_N_INSNS (7),	/* Div.  */
-      COSTS_N_INSNS (2),	/* Mult.  */
-      COSTS_N_INSNS (3),	/* Mult_addsub.  */
-      COSTS_N_INSNS (3),	/* Fma.  */
-      COSTS_N_INSNS (1),	/* Addsub.  */
-      0,			/* Fpconst.  */
-      0,			/* Neg.  */
-      0,			/* Compare.  */
-      0,			/* Widen.  */
-      0,			/* Narrow.  */
-      0,			/* Toint.  */
-      0,			/* Fromint.  */
-      0				/* Roundint.  */
-    },
-    /* FP DFmode */
-    {
-      COSTS_N_INSNS (15),	/* Div.  */
-      COSTS_N_INSNS (5),	/* Mult.  */
-      COSTS_N_INSNS (7),	/* Mult_addsub.  */
-      COSTS_N_INSNS (7),	/* Fma.  */
-      COSTS_N_INSNS (3),	/* Addsub.  */
-      0,			/* Fpconst.  */
-      0,			/* Neg.  */
-      0,			/* Compare.  */
-      0,			/* Widen.  */
-      0,			/* Narrow.  */
-      0,			/* Toint.  */
-      0,			/* Fromint.  */
-      0				/* Roundint.  */
-    }
-  },
-  /* Vector */
-  {
-    COSTS_N_INSNS (1)	/* Alu.  */
-  }
-};
 
 
 const struct cpu_cost_table cortexa9_extra_costs =
@@ -1357,7 +1265,7 @@ const struct tune_params arm_slowmul_tune =
 {
   arm_slowmul_rtx_costs,
   NULL,
-  NULL,
+  NULL,						/* Sched adj cost.  */
   3,						/* Constant limit.  */
   5,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1373,7 +1281,7 @@ const struct tune_params arm_fastmul_tune =
 {
   arm_fastmul_rtx_costs,
   NULL,
-  NULL,
+  NULL,						/* Sched adj cost.  */
   1,						/* Constant limit.  */
   5,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1392,7 +1300,7 @@ const struct tune_params arm_strongarm_tune =
 {
   arm_fastmul_rtx_costs,
   NULL,
-  NULL,
+  NULL,						/* Sched adj cost.  */
   1,						/* Constant limit.  */
   3,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1424,7 +1332,7 @@ const struct tune_params arm_9e_tune =
 {
   arm_9e_rtx_costs,
   NULL,
-  NULL,
+  NULL,						/* Sched adj cost.  */
   1,						/* Constant limit.  */
   5,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1440,7 +1348,7 @@ const struct tune_params arm_v6t2_tune =
 {
   arm_9e_rtx_costs,
   NULL,
-  NULL,
+  NULL,						/* Sched adj cost.  */
   1,						/* Constant limit.  */
   5,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1457,7 +1365,7 @@ const struct tune_params arm_cortex_tune =
 {
   arm_9e_rtx_costs,
   &generic_extra_costs,
-  NULL,
+  NULL,						/* Sched adj cost.  */
   1,						/* Constant limit.  */
   5,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1489,7 +1397,7 @@ const struct tune_params arm_cortex_a15_tune =
 {
   arm_9e_rtx_costs,
   &cortexa15_extra_costs,
-  NULL,
+  NULL,						/* Sched adj cost.  */
   1,						/* Constant limit.  */
   2,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1501,6 +1409,22 @@ const struct tune_params arm_cortex_a15_tune =
   false                                         /* Prefer Neon for 64-bits bitops.  */
 };
 
+const struct tune_params arm_cortex_a53_tune =
+{
+  arm_9e_rtx_costs,
+  &cortexa53_extra_costs,
+  NULL,						/* Scheduler cost adjustment.  */
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,			/* Vectorizer costs.  */
+  false						/* Prefer Neon for 64-bits bitops.  */
+};
+
 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
    less appealing.  Set max_insns_skipped to a low value.  */
 
@@ -1508,7 +1432,7 @@ const struct tune_params arm_cortex_a5_tune =
 {
   arm_9e_rtx_costs,
   NULL,
-  NULL,
+  NULL,						/* Sched adj cost.  */
   1,						/* Constant limit.  */
   1,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1536,13 +1460,36 @@ const struct tune_params arm_cortex_a9_tune =
   false                                         /* Prefer Neon for 64-bits bitops.  */
 };
 
+/* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
+   cycle to execute each.  An LDR from the constant pool also takes two cycles
+   to execute, but mildly increases pipelining opportunity (consecutive
+   loads/stores can be pipelined together, saving one cycle), and may also
+   improve icache utilisation.  Hence we prefer the constant pool for such
+   processors.  */
+
+const struct tune_params arm_v7m_tune =
+{
+  arm_9e_rtx_costs,
+  &generic_extra_costs,
+  NULL,						/* Sched adj cost.  */
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_cortex_m_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {false, false},				/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
    arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
 const struct tune_params arm_v6m_tune =
 {
   arm_9e_rtx_costs,
   NULL,
-  NULL,
+  NULL,						/* Sched adj cost.  */
   1,						/* Constant limit.  */
   5,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2506,6 +2453,10 @@ arm_option_override (void)
 	arm_pic_register = pic_register;
     }
 
+  if (TARGET_VXWORKS_RTP
+      && !global_options_set.x_arm_pic_data_is_text_relative)
+    arm_pic_data_is_text_relative = 0;
+
   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
   if (fix_cm3_ldrd == 2)
     {
@@ -4785,18 +4736,18 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
 	if (count == -1
 	    || !index
 	    || !TYPE_MAX_VALUE (index)
-	    || !host_integerp (TYPE_MAX_VALUE (index), 1)
+	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
 	    || !TYPE_MIN_VALUE (index)
-	    || !host_integerp (TYPE_MIN_VALUE (index), 1)
+	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
 	    || count < 0)
 	  return -1;
 
-	count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
-		      - tree_low_cst (TYPE_MIN_VALUE (index), 1));
+	count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
+		      - tree_to_uhwi (TYPE_MIN_VALUE (index)));
 
 	/* There must be no padding.  */
-	if (!host_integerp (TYPE_SIZE (type), 1)
-	    || (tree_low_cst (TYPE_SIZE (type), 1)
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
 		!= count * GET_MODE_BITSIZE (*modep)))
 	  return -1;
 
@@ -4825,8 +4776,8 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
 	  }
 
 	/* There must be no padding.  */
-	if (!host_integerp (TYPE_SIZE (type), 1)
-	    || (tree_low_cst (TYPE_SIZE (type), 1)
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
 		!= count * GET_MODE_BITSIZE (*modep)))
 	  return -1;
 
@@ -4857,8 +4808,8 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
 	  }
 
 	/* There must be no padding.  */
-	if (!host_integerp (TYPE_SIZE (type), 1)
-	    || (tree_low_cst (TYPE_SIZE (type), 1)
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
 		!= count * GET_MODE_BITSIZE (*modep)))
 	  return -1;
 
@@ -5992,7 +5943,8 @@ require_pic_register (void)
 	         we can't yet emit instructions directly in the final
 		 insn stream.  Queue the insns on the entry edge, they will
 		 be committed after everything else is expanded.  */
-	      insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
+	      insert_insn_on_edge (seq,
+				   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
 	    }
 	}
     }
@@ -6022,7 +5974,7 @@ legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
 	   || (GET_CODE (orig) == SYMBOL_REF &&
 	       SYMBOL_REF_LOCAL_P (orig)))
 	  && NEED_GOT_RELOC
-	  && !TARGET_VXWORKS_RTP)
+	  && arm_pic_data_is_text_relative)
 	insn = arm_pic_static_addr (orig, reg);
       else
 	{
@@ -8815,6 +8767,30 @@ arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
    call (one insn for -Os) and then one for processing the result.  */
 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
 
+#define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)				\
+	do								\
+	  {								\
+	    shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);	\
+	    if (shift_op != NULL					\
+	        && arm_rtx_shift_left_p (XEXP (x, IDX)))		\
+	      {								\
+	        if (shift_reg)						\
+		  {							\
+		    if (speed_p)					\
+		      *cost += extra_cost->alu.arith_shift_reg;	\
+		    *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);	\
+		  }							\
+	        else if (speed_p)					\
+		  *cost += extra_cost->alu.arith_shift;		\
+									\
+		  *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)	\
+			  + rtx_cost (XEXP (x, 1 - IDX),		\
+			              OP, 1, speed_p));		\
+	        return true;						\
+	      }								\
+	  }								\
+	while (0);
+
 /* RTX costs.  Make an estimate of the cost of executing the operation
    X, which is contained with an operation with code OUTER_CODE.
    SPEED_P indicates whether the cost desired is the performance cost,
@@ -9171,6 +9147,15 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
       if (GET_MODE_CLASS (mode) == MODE_INT
 	  && GET_MODE_SIZE (mode) < 4)
 	{
+	  rtx shift_op, shift_reg;
+	  shift_reg = NULL;
+
+	  /* We check both sides of the MINUS for shifter operands since,
+	     unlike PLUS, it's not commutative.  */
+
+	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
+	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
+
 	  /* Slightly disparage, as we might need to widen the result.  */
 	  *cost = 1 + COSTS_N_INSNS (1);
 	  if (speed_p)
@@ -9270,11 +9255,18 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
 	  return false;
 	}
 
+	/* Narrow modes can be synthesized in SImode, but the range
+	   of useful sub-operations is limited.  Check for shift operations
+	   on one of the operands.  Only left shifts can be used in the
+	   narrow modes.  */
       if (GET_MODE_CLASS (mode) == MODE_INT
 	  && GET_MODE_SIZE (mode) < 4)
 	{
-	  /* Narrow modes can be synthesized in SImode, but the range
-	     of useful sub-operations is limited.  */
+	  rtx shift_op, shift_reg;
+	  shift_reg = NULL;
+
+	  HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
+
 	  if (CONST_INT_P (XEXP (x, 1)))
 	    {
 	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
@@ -10393,6 +10385,8 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
     }
 }
 
+#undef HANDLE_NARROW_SHIFT_ARITH
+
 /* RTX costs when optimizing for size.  */
 static bool
 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
@@ -11338,6 +11332,20 @@ arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
 }
 
+/* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
+   on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
+   sequences of non-executed instructions in IT blocks probably take the same
+   amount of time as executed instructions (and the IT instruction itself takes
+   space in icache).  This function was experimentally determined to give good
+   results on a popular embedded benchmark.  */
+
+static int
+arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
+{
+  return (TARGET_32BIT && speed_p) ? 1
+         : arm_default_branch_cost (speed_p, predictable_p);
+}
+
 static bool fp_consts_inited = false;
 
 static REAL_VALUE_TYPE value_fp0;
@@ -18379,7 +18387,8 @@ arm_r3_live_at_start_p (void)
   /* Just look at cfg info, which is still close enough to correct at this
      point.  This gives false positives for broken functions that might use
      uninitialized data that happens to be allocated in r3, but who cares?  */
-  return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 3);
+  return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
+			  3);
 }
 
 /* Compute the number of bytes used to store the static chain register on the
@@ -19912,7 +19921,7 @@ any_sibcall_could_use_r3 (void)
 
   if (!crtl->tail_call_emit)
     return false;
-  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
     if (e->flags & EDGE_SIBCALL)
       {
 	rtx call = BB_END (e->src);
@@ -21501,7 +21510,7 @@ arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
 	{
 	  /* See legitimize_pic_address for an explanation of the
 	     TARGET_VXWORKS_RTP check.  */
-	  if (TARGET_VXWORKS_RTP
+	  if (!arm_pic_data_is_text_relative
 	      || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
 	    fputs ("(GOT)", asm_out_file);
 	  else
@@ -27983,10 +27992,11 @@ arm_dbx_register_number (unsigned int regno)
 static rtx
 arm_dwarf_register_span (rtx rtl)
 {
+  enum machine_mode mode;
   unsigned regno;
+  rtx parts[8];
   int nregs;
   int i;
-  rtx p;
 
   regno = REGNO (rtl);
   if (!IS_VFP_REGNUM (regno))
@@ -27999,15 +28009,33 @@ arm_dwarf_register_span (rtx rtl)
      corresponding D register.  Until GDB supports this, we shall use the
      legacy encodings.  We also use these encodings for D0-D15 for
      compatibility with older debuggers.  */
-  if (VFP_REGNO_OK_FOR_SINGLE (regno))
+  mode = GET_MODE (rtl);
+  if (GET_MODE_SIZE (mode) < 8)
     return NULL_RTX;
 
-  nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
-  p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
-  for (i = 0; i < nregs; i++)
-    XVECEXP (p, 0, i) = gen_rtx_REG (DImode, regno + i);
+  if (VFP_REGNO_OK_FOR_SINGLE (regno))
+    {
+      nregs = GET_MODE_SIZE (mode) / 4;
+      for (i = 0; i < nregs; i += 2)
+	if (TARGET_BIG_END)
+	  {
+	    parts[i] = gen_rtx_REG (SImode, regno + i + 1);
+	    parts[i + 1] = gen_rtx_REG (SImode, regno + i);
+	  }
+	else
+	  {
+	    parts[i] = gen_rtx_REG (SImode, regno + i);
+	    parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
+	  }
+    }
+  else
+    {
+      nregs = GET_MODE_SIZE (mode) / 8;
+      for (i = 0; i < nregs; i++)
+	parts[i] = gen_rtx_REG (DImode, regno + i);
+    }
 
-  return p;
+  return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
 }
 
 #if ARM_UNWIND_INFO
@@ -28850,7 +28878,7 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
 static HOST_WIDE_INT
 arm_vector_alignment (const_tree type)
 {
-  HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
+  HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
 
   if (TARGET_AAPCS_BASED)
     align = MIN (align, 64);
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 1781b75b34b..dbd841ec842 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -568,6 +568,10 @@ extern int prefer_neon_for_64bits;
 #define NEED_PLT_RELOC	0
 #endif
 
+#ifndef TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE
+#define TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE 1
+#endif
+
 /* Nonzero if we need to refer to the GOT with a PC-relative
    offset.  In other words, generate
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 3726201dd4f..a26550a476a 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -5170,7 +5170,8 @@
   [(set_attr "length" "8,4,8,8")
    (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")
    (set_attr "ce_count" "2")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "type" "multiple,mov_reg,multiple,multiple")]
 )
 
 (define_insn "extend<mode>di2"
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index 9b740386ca3..fa0839a9e12 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -158,6 +158,10 @@ mlong-calls
 Target Report Mask(LONG_CALLS)
 Generate call insns as indirect calls, if necessary
 
+mpic-data-is-text-relative
+Target Report Var(arm_pic_data_is_text_relative) Init(TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE)
+Assume data segments are relative to text segment.
+
 mpic-register=
 Target RejectNegative Joined Var(arm_pic_register_string)
 Specify the register to be used for PIC addressing
diff --git a/gcc/config/arm/cortex-a15.md b/gcc/config/arm/cortex-a15.md
index ccad6207608..5a31a097918 100644
--- a/gcc/config/arm/cortex-a15.md
+++ b/gcc/config/arm/cortex-a15.md
@@ -158,7 +158,7 @@
   "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str,ca15_str")
 
 ;; We include Neon.md here to ensure that the branch can block the Neon units.
-(include "cortex-a15-neon.md")
+(include "../arm/cortex-a15-neon.md")
 
 ;; We lie with calls.  They take up all issue slots, and form a block in the
 ;; pipeline.  The result however is available the next cycle.
diff --git a/gcc/config/avr/avr-c.c b/gcc/config/avr/avr-c.c
index 4e64405a351..2cfb264ad20 100644
--- a/gcc/config/avr/avr-c.c
+++ b/gcc/config/avr/avr-c.c
@@ -26,6 +26,7 @@
 #include "tm_p.h"
 #include "cpplib.h"
 #include "tree.h"
+#include "stor-layout.h"
 #include "target.h"
 #include "c-family/c-common.h"
 #include "langhooks.h"
diff --git a/gcc/config/avr/avr-log.c b/gcc/config/avr/avr-log.c
index 87fa14d7006..3d2f54d9707 100644
--- a/gcc/config/avr/avr-log.c
+++ b/gcc/config/avr/avr-log.c
@@ -24,6 +24,7 @@
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "print-tree.h"
 #include "output.h"
 #include "input.h"
 #include "function.h"
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index e7e1c2f138b..8d98c811d9b 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -32,6 +32,10 @@
 #include "flags.h"
 #include "reload.h"
 #include "tree.h"
+#include "print-tree.h"
+#include "calls.h"
+#include "stor-layout.h"
+#include "stringpool.h"
 #include "output.h"
 #include "expr.h"
 #include "c-family/c-common.h"
diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c
index bd677f88bab..d7af939ebda 100644
--- a/gcc/config/bfin/bfin.c
+++ b/gcc/config/bfin/bfin.c
@@ -32,6 +32,8 @@
 #include "output.h"
 #include "insn-attr.h"
 #include "tree.h"
+#include "varasm.h"
+#include "calls.h"
 #include "flags.h"
 #include "except.h"
 #include "function.h"
@@ -3598,7 +3600,7 @@ hwloop_optimize (hwloop_info loop)
 
       if (single_pred_p (bb)
 	  && single_pred_edge (bb)->flags & EDGE_FALLTHRU
-	  && single_pred (bb) != ENTRY_BLOCK_PTR)
+	  && single_pred (bb) != ENTRY_BLOCK_PTR_FOR_FN (cfun))
 	{
 	  bb = single_pred (bb);
 	  last_insn = BB_END (bb);
diff --git a/gcc/config/c6x/c6x.c b/gcc/config/c6x/c6x.c
index a7c36833b45..af310bac8dc 100644
--- a/gcc/config/c6x/c6x.c
+++ b/gcc/config/c6x/c6x.c
@@ -25,6 +25,10 @@
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "stringpool.h"
 #include "insn-flags.h"
 #include "output.h"
 #include "insn-attr.h"
@@ -52,6 +56,7 @@
 #include "hw-doloop.h"
 #include "regrename.h"
 #include "dumpfile.h"
+#include "gimple-expr.h"
 
 /* Table of supported architecture variants.  */
 typedef struct
diff --git a/gcc/config/c6x/predicates.md b/gcc/config/c6x/predicates.md
index 1a2fe8f69f5..fbcbdd02457 100644
--- a/gcc/config/c6x/predicates.md
+++ b/gcc/config/c6x/predicates.md
@@ -210,9 +210,9 @@
         t = DECL_SIZE_UNIT (t);
       else
 	t = TYPE_SIZE_UNIT (TREE_TYPE (t));
-      if (t && host_integerp (t, 0))
+      if (t && tree_fits_shwi_p (t))
 	{
-	  size = tree_low_cst (t, 0);
+	  size = tree_to_shwi (t);
 	  if (size < 0)
 	    size = 0;
 	}
diff --git a/gcc/config/cr16/cr16.c b/gcc/config/cr16/cr16.c
index 1ac29cc800a..b3972766d5b 100644
--- a/gcc/config/cr16/cr16.c
+++ b/gcc/config/cr16/cr16.c
@@ -24,6 +24,8 @@
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "calls.h"
 #include "tm_p.h"
 #include "regs.h"
 #include "hard-reg-set.h"
diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c
index 7432251b950..2d2a108031d 100644
--- a/gcc/config/cris/cris.c
+++ b/gcc/config/cris/cris.c
@@ -30,6 +30,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "insn-attr.h"
 #include "flags.h"
 #include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "stmt.h"
 #include "expr.h"
 #include "except.h"
 #include "function.h"
diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
index fc7a9a8f99e..761576df565 100644
--- a/gcc/config/darwin.c
+++ b/gcc/config/darwin.c
@@ -32,6 +32,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "insn-attr.h"
 #include "flags.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "varasm.h"
+#include "stor-layout.h"
 #include "expr.h"
 #include "reload.h"
 #include "function.h"
@@ -46,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "debug.h"
 #include "obstack.h"
 #include "gimple.h"
+#include "gimplify.h"
 #include "lto-streamer.h"
 
 /* Darwin supports a feature called fix-and-continue, which is used
@@ -1508,7 +1512,7 @@ machopic_select_section (tree decl,
 
   zsize = (DECL_P (decl) 
 	   && (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == CONST_DECL) 
-	   && tree_low_cst (DECL_SIZE_UNIT (decl), 1) == 0);
+	   && tree_to_uhwi (DECL_SIZE_UNIT (decl)) == 0);
 
   one = DECL_P (decl) 
 	&& TREE_CODE (decl) == VAR_DECL 
@@ -1649,7 +1653,7 @@ machopic_select_section (tree decl,
       static bool warned_objc_46 = false;
       /* We shall assert that zero-sized objects are an error in ObjC 
          meta-data.  */
-      gcc_assert (tree_low_cst (DECL_SIZE_UNIT (decl), 1) != 0);
+      gcc_assert (tree_to_uhwi (DECL_SIZE_UNIT (decl)) != 0);
       
       /* ??? This mechanism for determining the metadata section is
 	 broken when LTO is in use, since the frontend that generated
@@ -2186,7 +2190,7 @@ darwin_asm_declare_object_name (FILE *file,
 	machopic_define_symbol (DECL_RTL (decl));
     }
 
-  size = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
+  size = tree_to_uhwi (DECL_SIZE_UNIT (decl));
 
 #ifdef DEBUG_DARWIN_MEM_ALLOCATORS
 fprintf (file, "# dadon: %s %s (%llu, %u) local %d weak %d"
diff --git a/gcc/config/epiphany/epiphany.c b/gcc/config/epiphany/epiphany.c
index fd4c01c49a4..c264cdaee78 100644
--- a/gcc/config/epiphany/epiphany.c
+++ b/gcc/config/epiphany/epiphany.c
@@ -23,6 +23,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "stringpool.h"
 #include "rtl.h"
 #include "regs.h"
 #include "hard-reg-set.h"
@@ -2758,11 +2762,11 @@ epiphany_special_round_type_align (tree type, unsigned computed,
 	continue;
       offset = bit_position (field);
       size = DECL_SIZE (field);
-      if (!host_integerp (offset, 1) || !host_integerp (size, 1)
-	  || TREE_INT_CST_LOW (offset) >= try_align
-	  || TREE_INT_CST_LOW (size) >= try_align)
+      if (!tree_fits_uhwi_p (offset) || !tree_fits_uhwi_p (size)
+	  || tree_to_uhwi (offset) >= try_align
+	  || tree_to_uhwi (size) >= try_align)
 	return try_align;
-      total = TREE_INT_CST_LOW (offset) + TREE_INT_CST_LOW (size);
+      total = tree_to_uhwi (offset) + tree_to_uhwi (size);
       if (total > max)
 	max = total;
     }
@@ -2785,7 +2789,7 @@ epiphany_adjust_field_align (tree field, unsigned computed)
     {
       tree elmsz = TYPE_SIZE (TREE_TYPE (TREE_TYPE (field)));
 
-      if (!host_integerp (elmsz, 1) || tree_low_cst (elmsz, 1) >= 32)
+      if (!tree_fits_uhwi_p (elmsz) || tree_to_uhwi (elmsz) >= 32)
 	return 64;
     }
   return computed;
diff --git a/gcc/config/fr30/fr30.c b/gcc/config/fr30/fr30.c
index 4a45feafd04..caa50d9e691 100644
--- a/gcc/config/fr30/fr30.c
+++ b/gcc/config/fr30/fr30.c
@@ -33,6 +33,8 @@
 #include "flags.h"
 #include "recog.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
 #include "output.h"
 #include "expr.h"
 #include "obstack.h"
diff --git a/gcc/config/frv/frv.c b/gcc/config/frv/frv.c
index bcd55111434..a5eb2c1c844 100644
--- a/gcc/config/frv/frv.c
+++ b/gcc/config/frv/frv.c
@@ -23,6 +23,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "stringpool.h"
 #include "regs.h"
 #include "hard-reg-set.h"
 #include "insn-config.h"
@@ -8024,7 +8027,7 @@ frv_optimize_membar_global (basic_block bb, struct frv_io *first_io,
   /* We need to keep the membar if there is an edge to the exit block.  */
   FOR_EACH_EDGE (succ, ei, bb->succs)
   /* for (succ = bb->succ; succ != 0; succ = succ->succ_next) */
-    if (succ->dest == EXIT_BLOCK_PTR)
+    if (succ->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
       return;
 
   /* Work out the union of all successor blocks.  */
diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c
index 69f37fd02d6..f0ebca30f2c 100644
--- a/gcc/config/h8300/h8300.c
+++ b/gcc/config/h8300/h8300.c
@@ -25,6 +25,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "stringpool.h"
 #include "regs.h"
 #include "hard-reg-set.h"
 #include "insn-config.h"
diff --git a/gcc/config/i386/bdver3.md b/gcc/config/i386/bdver3.md
index 421a3d1b30e..019e9291b1b 100644
--- a/gcc/config/i386/bdver3.md
+++ b/gcc/config/i386/bdver3.md
@@ -16,19 +16,19 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 ;;
-;; AMD bdver3 Scheduling
+;; AMD bdver3 and bdver4 Scheduling
 ;;
-;; The bdver3 contains three pipelined FP units and two integer units.
-;; Fetching and decoding logic is different from previous fam15 processors.
-;; Fetching is done every two cycles rather than every cycle and
-;; two decode units are available. The decode units therefore decode
+;; The bdver3 and bdver4 contains three pipelined FP units and two integer
+;; units. ;; Fetching and decoding logic is different from previous fam15 
+;; processors. Fetching is done every two cycles rather than every cycle 
+;; and two decode units are available. The decode units therefore decode
 ;; four instructions in two cycles.
 ;;
 ;; The load/store queue unit is not attached to the schedulers but
 ;; communicates with all the execution units separately instead.
 ;;
-;; bdver3 belong to fam15 processors. We use the same insn attribute
-;; that was used for bdver1 decoding scheme.
+;; bdver3 and bdver4 belong to fam15 processors. We use the same insn 
+;; attribute that was used for bdver1 decoding scheme.
 
 (define_automaton "bdver3,bdver3_ieu,bdver3_load,bdver3_fp,bdver3_agu")
 
@@ -102,90 +102,90 @@
 
 ;; Jump instructions are executed in the branch unit completely transparent to us.
 (define_insn_reservation "bdver3_call" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "call,callv"))
 			 "bdver3-double,(bdver3-agu | bdver3-ieu),nothing")
 ;; PUSH mem is double path.
 (define_insn_reservation "bdver3_push" 1
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "push"))
 			 "bdver3-direct,bdver3-ieu,bdver3-store")
 ;; POP r16/mem are double path.
 (define_insn_reservation "bdver3_pop" 1
-                         (and (eq_attr "cpu" "bdver3")
+                         (and (eq_attr "cpu" "bdver3,bdver4")
                               (eq_attr "type" "pop"))
                          "bdver3-direct,bdver3-ivector")
 ;; LEAVE no latency info so far, assume same with amdfam10.
 (define_insn_reservation "bdver3_leave" 3
-                         (and (eq_attr "cpu" "bdver3")
+                         (and (eq_attr "cpu" "bdver3,bdver4")
                               (eq_attr "type" "leave"))
                          "bdver3-vector,bdver3-ivector")
 ;; LEA executes in AGU unit with 1 cycle latency on BDVER3.
 (define_insn_reservation "bdver3_lea" 1
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "lea"))
 			 "bdver3-direct,bdver3-ieu")
 ;; MUL executes in special multiplier unit attached to IEU1.
 (define_insn_reservation "bdver3_imul_DI" 6
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "imul")
 				   (and (eq_attr "mode" "DI")
 					(eq_attr "memory" "none,unknown"))))
 			 "bdver3-direct,bdver3-ieu1")
 (define_insn_reservation "bdver3_imul" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "imul")
 				   (eq_attr "memory" "none,unknown")))
 			 "bdver3-direct,bdver3-ieu1")
 (define_insn_reservation "bdver3_imul_mem_DI" 10
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "imul")
 				   (and (eq_attr "mode" "DI")
 					(eq_attr "memory" "load,both"))))
 			 "bdver3-direct,bdver3-load,bdver3-ieu1")
 (define_insn_reservation "bdver3_imul_mem" 8
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "imul")
 				   (eq_attr "memory" "load,both")))
 			 "bdver3-direct,bdver3-load,bdver3-ieu1")
 
 (define_insn_reservation "bdver3_str" 6
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "str")
 				   (eq_attr "memory" "load,both,store")))
 			 "bdver3-vector,bdver3-load,bdver3-ivector")
 
 ;; Integer instructions.
 (define_insn_reservation "bdver3_idirect" 1
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "bdver1_decode" "direct")
 				   (and (eq_attr "unit" "integer,unknown")
 					(eq_attr "memory" "none,unknown"))))
 			 "bdver3-direct,(bdver3-ieu|bdver3-agu)")
 (define_insn_reservation "bdver3_ivector" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "bdver1_decode" "vector")
 				   (and (eq_attr "unit" "integer,unknown")
 					(eq_attr "memory" "none,unknown"))))
 			 "bdver3-vector,bdver3-ivector")
 (define_insn_reservation "bdver3_idirect_loadmov" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "imov")
 				   (eq_attr "memory" "load")))
 			 "bdver3-direct,bdver3-load")
 (define_insn_reservation "bdver3_idirect_load" 5
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "bdver1_decode" "direct")
 				   (and (eq_attr "unit" "integer,unknown")
 					(eq_attr "memory" "load"))))
 			 "bdver3-direct,bdver3-load,bdver3-ieu")
 (define_insn_reservation "bdver3_idirect_movstore" 5
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "imov")
 				   (eq_attr "memory" "store")))
 			 "bdver3-direct,bdver3-ieu,bdver3-store")
 (define_insn_reservation "bdver3_idirect_both" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "bdver1_decode" "direct")
 				   (and (eq_attr "unit" "integer,unknown")
 					(eq_attr "memory" "both"))))
@@ -193,7 +193,7 @@
 			  bdver3-ieu,bdver3-store,
 			  bdver3-store")
 (define_insn_reservation "bdver3_idirect_store" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "bdver1_decode" "direct")
 				   (and (eq_attr "unit" "integer,unknown")
 					(eq_attr "memory" "store"))))
@@ -201,108 +201,108 @@
 			  bdver3-store")
 ;; BDVER3 floating point units.
 (define_insn_reservation "bdver3_fldxf" 13
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "fmov")
 				   (and (eq_attr "memory" "load")
 					(eq_attr "mode" "XF"))))
 			 "bdver3-vector,bdver3-fpload2,bdver3-fvector*9")
 (define_insn_reservation "bdver3_fld" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "fmov")
 				   (eq_attr "memory" "load")))
 			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
 (define_insn_reservation "bdver3_fstxf" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "fmov")
 				   (and (eq_attr "memory" "store,both")
 					(eq_attr "mode" "XF"))))
 			 "bdver3-vector,(bdver3-fpsched+bdver3-agu),(bdver3-store2+(bdver3-fvector*6))")
 (define_insn_reservation "bdver3_fst" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "fmov")
 				   (eq_attr "memory" "store,both")))
 			 "bdver3-double,(bdver3-fpsched),(bdver3-fsto+bdver3-store)")
 (define_insn_reservation "bdver3_fist" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "fistp,fisttp"))
 			 "bdver3-double,(bdver3-fpsched),(bdver3-fsto+bdver3-store)")
 (define_insn_reservation "bdver3_fmov_bdver3" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "fmov"))
 			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
 (define_insn_reservation "bdver3_fadd_load" 10
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "fop")
 				   (eq_attr "memory" "load")))
 			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
 (define_insn_reservation "bdver3_fadd" 6
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "fop"))
 			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
 (define_insn_reservation "bdver3_fmul_load" 6
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "fmul")
 				   (eq_attr "memory" "load")))
 			 "bdver3-double,bdver3-fpload,bdver3-ffma")
 (define_insn_reservation "bdver3_fmul" 6
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "fmul"))
 			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
 (define_insn_reservation "bdver3_fsgn" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "fsgn"))
 			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
 (define_insn_reservation "bdver3_fdiv_load" 42
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "fdiv")
 				   (eq_attr "memory" "load")))
 			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
 (define_insn_reservation "bdver3_fdiv" 42
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "fdiv"))
 			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
 (define_insn_reservation "bdver3_fpspc_load" 143
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "fpspc")
 				   (eq_attr "memory" "load")))
 			 "bdver3-vector,bdver3-fpload,bdver3-fvector")
 (define_insn_reservation "bdver3_fcmov_load" 17
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "fcmov")
 				   (eq_attr "memory" "load")))
 			 "bdver3-vector,bdver3-fpload,bdver3-fvector")
 (define_insn_reservation "bdver3_fcmov" 15
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "fcmov"))
 			 "bdver3-vector,bdver3-fpsched,bdver3-fvector")
 (define_insn_reservation "bdver3_fcomi_load" 6
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "fcmp")
 				   (and (eq_attr "bdver1_decode" "double")
 					(eq_attr "memory" "load"))))
 			 "bdver3-double,bdver3-fpload,(bdver3-ffma | bdver3-fsto)")
 (define_insn_reservation "bdver3_fcomi" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "bdver1_decode" "double")
 				   (eq_attr "type" "fcmp")))
 			 "bdver3-double,bdver3-fpsched,(bdver3-ffma | bdver3-fsto)")
 (define_insn_reservation "bdver3_fcom_load" 6
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "fcmp")
 				   (eq_attr "memory" "load")))
 			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
 (define_insn_reservation "bdver3_fcom" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "fcmp"))
 			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
 (define_insn_reservation "bdver3_fxch" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "fxch"))
 			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
 
 ;; SSE loads.
 (define_insn_reservation "bdver3_ssevector_avx128_unaligned_load" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemov")
 				   (and (eq_attr "prefix" "vex")
 					(and (eq_attr "movu" "1")
@@ -310,162 +310,162 @@
 						  (eq_attr "memory" "load"))))))
 			 "bdver3-direct,bdver3-fpload")
 (define_insn_reservation "bdver3_ssevector_avx256_unaligned_load" 5
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemov")
 				   (and (eq_attr "movu" "1")
 				        (and (eq_attr "mode" "V8SF,V4DF")
 				             (eq_attr "memory" "load")))))
 			 "bdver3-double,bdver3-fpload")
 (define_insn_reservation "bdver3_ssevector_sse128_unaligned_load" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemov")
 				   (and (eq_attr "movu" "1")
 				        (and (eq_attr "mode" "V4SF,V2DF")
 				             (eq_attr "memory" "load")))))
 			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
 (define_insn_reservation "bdver3_ssevector_avx128_load" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemov")
 				   (and (eq_attr "prefix" "vex")
 				        (and (eq_attr "mode" "V4SF,V2DF,TI")
 				             (eq_attr "memory" "load")))))
 			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
 (define_insn_reservation "bdver3_ssevector_avx256_load" 5
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemov")
 				   (and (eq_attr "mode" "V8SF,V4DF,OI")
 				        (eq_attr "memory" "load"))))
 			 "bdver3-double,bdver3-fpload,bdver3-fmal")
 (define_insn_reservation "bdver3_ssevector_sse128_load" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemov")
 				   (and (eq_attr "mode" "V4SF,V2DF,TI")
 				        (eq_attr "memory" "load"))))
 			 "bdver3-direct,bdver3-fpload")
 (define_insn_reservation "bdver3_ssescalar_movq_load" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemov")
 				   (and (eq_attr "mode" "DI")
 				        (eq_attr "memory" "load"))))
 			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
 (define_insn_reservation "bdver3_ssescalar_vmovss_load" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemov")
 				   (and (eq_attr "prefix" "vex")
 				        (and (eq_attr "mode" "SF")
 				             (eq_attr "memory" "load")))))
 			 "bdver3-direct,bdver3-fpload")
 (define_insn_reservation "bdver3_ssescalar_sse128_load" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemov")
 				   (and (eq_attr "mode" "SF,DF")
 				        (eq_attr "memory" "load"))))
 			 "bdver3-direct,bdver3-fpload, bdver3-ffma")
 (define_insn_reservation "bdver3_mmxsse_load" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "mmxmov,ssemov")
 				   (eq_attr "memory" "load")))
 			 "bdver3-direct,bdver3-fpload, bdver3-fmal")
 
 ;; SSE stores.
 (define_insn_reservation "bdver3_sse_store_avx256" 5
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemov")
 				   (and (eq_attr "mode" "V8SF,V4DF,OI")
 					(eq_attr "memory" "store,both"))))
 			 "bdver3-double,bdver3-fpsched,((bdver3-fsto+bdver3-store)*2)")
 (define_insn_reservation "bdver3_sse_store" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemov")
 				   (and (eq_attr "mode" "V4SF,V2DF,TI")
 					(eq_attr "memory" "store,both"))))
 			 "bdver3-direct,bdver3-fpsched,((bdver3-fsto+bdver3-store)*2)")
 (define_insn_reservation "bdver3_mmxsse_store_short" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "mmxmov,ssemov")
 				   (eq_attr "memory" "store,both")))
 			 "bdver3-direct,bdver3-fpsched,(bdver3-fsto+bdver3-store)")
 
 ;; Register moves.
 (define_insn_reservation "bdver3_ssevector_avx256" 3
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemov")
 				   (and (eq_attr "mode" "V8SF,V4DF,OI")
 					(eq_attr "memory" "none"))))
 			 "bdver3-double,bdver3-fpsched,bdver3-fmal")
 (define_insn_reservation "bdver3_movss_movsd" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemov")
 				   (and (eq_attr "mode" "SF,DF")
                                         (eq_attr "memory" "none"))))
 			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
 (define_insn_reservation "bdver3_mmxssemov" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "mmxmov,ssemov")
 				   (eq_attr "memory" "none")))
 			 "bdver3-direct,bdver3-fpsched,bdver3-fmal")
 ;; SSE logs.
 (define_insn_reservation "bdver3_sselog_load_256" 7
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "sselog,sselog1")
 				   (and (eq_attr "mode" "V8SF")
 				   (eq_attr "memory" "load"))))
 			 "bdver3-double,bdver3-fpload,bdver3-fmal")
 (define_insn_reservation "bdver3_sselog_256" 3
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "sselog,sselog1")
                                    (eq_attr "mode" "V8SF")))
 			 "bdver3-double,bdver3-fpsched,bdver3-fmal")
 (define_insn_reservation "bdver3_sselog_load" 6
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "sselog,sselog1")
 				   (eq_attr "memory" "load")))
 			 "bdver3-direct,bdver3-fpload,bdver3-fxbar")
 (define_insn_reservation "bdver3_sselog" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "sselog,sselog1"))
 			 "bdver3-direct,bdver3-fpsched,bdver3-fxbar")
 
 ;; SSE Shuffles
 (define_insn_reservation "bdver3_sseshuf_load_256" 7
-                         (and (eq_attr "cpu" "bdver3")
+                         (and (eq_attr "cpu" "bdver3,bdver4")
                               (and (eq_attr "type" "sseshuf,sseshuf1")
                                    (and (eq_attr "mode" "V8SF")
                                    (eq_attr "memory" "load"))))
                          "bdver3-double,bdver3-fpload,bdver3-fpshuf")
 (define_insn_reservation "bdver3_sseshuf_load" 6
-                         (and (eq_attr "cpu" "bdver3")
+                         (and (eq_attr "cpu" "bdver3,bdver4")
                               (and (eq_attr "type" "sseshuf,sseshuf1")
                                    (eq_attr "memory" "load")))
                          "bdver3-direct,bdver3-fpload,bdver3-fpshuf")
 
 (define_insn_reservation "bdver3_sseshuf_256" 3
-                         (and (eq_attr "cpu" "bdver3")
+                         (and (eq_attr "cpu" "bdver3,bdver4")
                               (and (eq_attr "type" "sseshuf")
                                    (eq_attr "mode" "V8SF")))
                          "bdver3-double,bdver3-fpsched,bdver3-fpshuf")
 (define_insn_reservation "bdver3_sseshuf" 2
-                         (and (eq_attr "cpu" "bdver3")
+                         (and (eq_attr "cpu" "bdver3,bdver4")
                               (eq_attr "type" "sseshuf,sseshuf1"))
                          "bdver3-direct,bdver3-fpsched,bdver3-fpshuf")
 
 ;; PCMP actually executes in FMAL.
 (define_insn_reservation "bdver3_ssecmp_load" 6
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecmp")
 				   (eq_attr "memory" "load")))
 			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
 (define_insn_reservation "bdver3_ssecmp" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "ssecmp"))
 			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
 (define_insn_reservation "bdver3_ssecomi_load" 6
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecomi")
 				   (eq_attr "memory" "load")))
 			 "bdver3-double,bdver3-fpload,(bdver3-ffma | bdver3-fsto)")
 (define_insn_reservation "bdver3_ssecomi" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (eq_attr "type" "ssecomi"))
 			 "bdver3-double,bdver3-fpsched,(bdver3-ffma | bdver3-fsto)")
 
@@ -474,7 +474,7 @@
 
 ;; 256 bit conversion.
 (define_insn_reservation "bdver3_vcvtX2Y_avx256_load" 8
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "load")
 					(ior (ior (match_operand:V4DF 0 "register_operand")
@@ -485,7 +485,7 @@
 						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
 			 "bdver3-vector,bdver3-fpload,bdver3-fvector")
 (define_insn_reservation "bdver3_vcvtX2Y_avx256" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "none")
 					(ior (ior (match_operand:V4DF 0 "register_operand")
@@ -497,40 +497,40 @@
 			 "bdver3-vector,bdver3-fpsched,bdver3-fvector")
 ;; CVTSS2SD, CVTSD2SS.
 (define_insn_reservation "bdver3_ssecvt_cvtss2sd_load" 8
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "mode" "SF,DF")
 					(eq_attr "memory" "load"))))
 			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
 (define_insn_reservation "bdver3_ssecvt_cvtss2sd" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "mode" "SF,DF")
 					(eq_attr "memory" "none"))))
 			 "bdver3-direct,bdver3-fpsched,bdver3-fcvt")
 ;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ.
 (define_insn_reservation "bdver3_sseicvt_cvtsi2sd_load" 8
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "sseicvt")
 				   (and (eq_attr "mode" "SF,DF")
 					(eq_attr "memory" "load"))))
 			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
 (define_insn_reservation "bdver3_sseicvt_cvtsi2sd" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "sseicvt")
 				   (and (eq_attr "mode" "SF,DF")
 					(eq_attr "memory" "none"))))
 			 "bdver3-double,bdver3-fpsched,(nothing | bdver3-fcvt)")
 ;; CVTPD2PS.
 (define_insn_reservation "bdver3_ssecvt_cvtpd2ps_load" 8
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "load")
                                         (and (match_operand:V4SF 0 "register_operand")
 					     (match_operand:V2DF 1 "nonimmediate_operand")))))
 			 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)")
 (define_insn_reservation "bdver3_ssecvt_cvtpd2ps" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "none")
                                         (and (match_operand:V4SF 0 "register_operand")
@@ -538,7 +538,7 @@
 			 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)")
 ;; CVTPI2PS, CVTDQ2PS.
 (define_insn_reservation "bdver3_ssecvt_cvtdq2ps_load" 8
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "load")
                                         (and (match_operand:V4SF 0 "register_operand")
@@ -546,7 +546,7 @@
 					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
 			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
 (define_insn_reservation "bdver3_ssecvt_cvtdq2ps" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "none")
                                         (and (match_operand:V4SF 0 "register_operand")
@@ -555,14 +555,14 @@
 			 "bdver3-direct,bdver3-fpsched,bdver3-fcvt")
 ;; CVTDQ2PD.
 (define_insn_reservation "bdver3_ssecvt_cvtdq2pd_load" 8
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "load")
                                         (and (match_operand:V2DF 0 "register_operand")
 					     (match_operand:V4SI 1 "nonimmediate_operand")))))
 			 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)")
 (define_insn_reservation "bdver3_ssecvt_cvtdq2pd" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "none")
                                         (and (match_operand:V2DF 0 "register_operand")
@@ -570,7 +570,7 @@
 			 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)")
 ;; CVTPS2PD, CVTPI2PD.
 (define_insn_reservation "bdver3_ssecvt_cvtps2pd_load" 6
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "load")
                                         (and (match_operand:V2DF 0 "register_operand")
@@ -578,7 +578,7 @@
 					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
 			 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)")
 (define_insn_reservation "bdver3_ssecvt_cvtps2pd" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "load")
                                         (and (match_operand:V2DF 0 "register_operand")
@@ -587,27 +587,27 @@
 			 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)")
 ;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ.
 (define_insn_reservation "bdver3_ssecvt_cvtsX2si_load" 8
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "sseicvt")
 				   (and (eq_attr "mode" "SI,DI")
 					(eq_attr "memory" "load"))))
 			 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fsto)")
 (define_insn_reservation "bdver3_ssecvt_cvtsX2si" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "sseicvt")
 				   (and (eq_attr "mode" "SI,DI")
 					(eq_attr "memory" "none"))))
 			 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fsto)")
 ;; CVTPD2PI, CVTTPD2PI.
 (define_insn_reservation "bdver3_ssecvt_cvtpd2pi_load" 8
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "load")
 				        (and (match_operand:V2DF 1 "nonimmediate_operand")
 					     (match_operand:V2SI 0 "register_operand")))))
 			 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fxbar)")
 (define_insn_reservation "bdver3_ssecvt_cvtpd2pi" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "none")
 				        (and (match_operand:V2DF 1 "nonimmediate_operand")
@@ -615,14 +615,14 @@
 			 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fxbar)")
 ;; CVTPD2DQ, CVTTPD2DQ.
 (define_insn_reservation "bdver3_ssecvt_cvtpd2dq_load" 6
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "load")
 				        (and (match_operand:V2DF 1 "nonimmediate_operand")
 					     (match_operand:V4SI 0 "register_operand")))))
 			 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fxbar)")
 (define_insn_reservation "bdver3_ssecvt_cvtpd2dq" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "none")
 				        (and (match_operand:V2DF 1 "nonimmediate_operand")
@@ -630,7 +630,7 @@
 			 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fxbar)")
 ;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ.
 (define_insn_reservation "bdver3_ssecvt_cvtps2pi_load" 8
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
                                    (and (eq_attr "memory" "load")
 				        (and (match_operand:V4SF 1 "nonimmediate_operand")
@@ -638,7 +638,7 @@
 						  (match_operand: V4SI 0 "register_operand"))))))
 			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
 (define_insn_reservation "bdver3_ssecvt_cvtps2pi" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "memory" "none")
 				        (and (match_operand:V4SF 1 "nonimmediate_operand")
@@ -648,100 +648,100 @@
 
 ;; SSE MUL, ADD, and MULADD.
 (define_insn_reservation "bdver3_ssemuladd_load_256" 11
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
 				   (and (eq_attr "mode" "V8SF,V4DF")
 					(eq_attr "memory" "load"))))
 			 "bdver3-double,bdver3-fpload,bdver3-ffma")
 (define_insn_reservation "bdver3_ssemuladd_256" 7
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
 				   (and (eq_attr "mode" "V8SF,V4DF")
 					(eq_attr "memory" "none"))))
 			 "bdver3-double,bdver3-fpsched,bdver3-ffma")
 (define_insn_reservation "bdver3_ssemuladd_load" 10
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
 				   (eq_attr "memory" "load")))
 			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
 (define_insn_reservation "bdver3_ssemuladd" 6
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
 				   (eq_attr "memory" "none")))
 			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
 (define_insn_reservation "bdver3_sseimul_load" 8
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "sseimul")
 				   (eq_attr "memory" "load")))
 			 "bdver3-direct,bdver3-fpload,bdver3-fmma")
 (define_insn_reservation "bdver3_sseimul" 4
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "sseimul")
 				   (eq_attr "memory" "none")))
 			 "bdver3-direct,bdver3-fpsched,bdver3-fmma")
 (define_insn_reservation "bdver3_sseiadd_load" 6
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "sseiadd")
 				   (eq_attr "memory" "load")))
 			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
 (define_insn_reservation "bdver3_sseiadd" 2
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "sseiadd")
 				   (eq_attr "memory" "none")))
 			 "bdver3-direct,bdver3-fpsched,bdver3-fmal")
 
 ;; SSE DIV: no throughput information (assume same as amdfam10).
 (define_insn_reservation "bdver3_ssediv_double_load_256" 27
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssediv")
 				   (and (eq_attr "mode" "V4DF")
 				        (eq_attr "memory" "load"))))
 			 "bdver3-double,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
 (define_insn_reservation "bdver3_ssediv_double_256" 27
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssediv")
 				   (and (eq_attr "mode" "V4DF")
 				        (eq_attr "memory" "none"))))
 			 "bdver3-double,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
 (define_insn_reservation "bdver3_ssediv_single_load_256" 27
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssediv")
 				   (and (eq_attr "mode" "V8SF")
 				        (eq_attr "memory" "load"))))
 			 "bdver3-double,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
 (define_insn_reservation "bdver3_ssediv_single_256" 24
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssediv")
 				   (and (eq_attr "mode" "V8SF")
 				        (eq_attr "memory" "none"))))
 			 "bdver3-double,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
 (define_insn_reservation "bdver3_ssediv_double_load" 27
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssediv")
 				   (and (eq_attr "mode" "DF,V2DF")
 					(eq_attr "memory" "load"))))
 			 "bdver3-direct,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
 (define_insn_reservation "bdver3_ssediv_double" 27
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssediv")
 				   (and (eq_attr "mode" "DF,V2DF")
 					(eq_attr "memory" "none"))))
 			 "bdver3-direct,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
 (define_insn_reservation "bdver3_ssediv_single_load" 27 
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssediv")
 				   (and (eq_attr "mode" "SF,V4SF")
 					(eq_attr "memory" "load"))))
 			 "bdver3-direct,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
 (define_insn_reservation "bdver3_ssediv_single" 24
-			 (and (eq_attr "cpu" "bdver3")
+			 (and (eq_attr "cpu" "bdver3,bdver4")
 			      (and (eq_attr "type" "ssediv")
 				   (and (eq_attr "mode" "SF,V4SF")
 					(eq_attr "memory" "none"))))
 			 "bdver3-direct,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
 
 (define_insn_reservation "bdver3_sseins" 3
-                         (and (eq_attr "cpu" "bdver3")
+                         (and (eq_attr "cpu" "bdver3,bdver4")
                               (and (eq_attr "type" "sseins")
                                    (eq_attr "mode" "TI")))
                          "bdver3-direct,bdver3-fpsched,bdver3-fxbar")
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 823f92da8c3..a4a1f40548a 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -550,6 +550,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 	processor = PROCESSOR_GEODE;
       else if (has_movbe)
 	processor = PROCESSOR_BTVER2;
+      else if (has_avx2)
+        processor = PROCESSOR_BDVER4;
       else if (has_xsaveopt)
         processor = PROCESSOR_BDVER3;
       else if (has_bmi)
@@ -772,6 +774,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
     case PROCESSOR_BDVER3:
       cpu = "bdver3";
       break;
+    case PROCESSOR_BDVER4:
+      cpu = "bdver4";
+      break;
     case PROCESSOR_BTVER1:
       cpu = "btver1";
       break;
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 1c053b1e51e..18c2929d10f 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -117,6 +117,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
       def_or_undef (parse_in, "__bdver3");
       def_or_undef (parse_in, "__bdver3__");
       break;
+    case PROCESSOR_BDVER4:
+      def_or_undef (parse_in, "__bdver4");
+      def_or_undef (parse_in, "__bdver4__");
+      break;
     case PROCESSOR_BTVER1:
       def_or_undef (parse_in, "__btver1");
       def_or_undef (parse_in, "__btver1__");
@@ -224,6 +228,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     case PROCESSOR_BDVER3:
       def_or_undef (parse_in, "__tune_bdver3__");
       break;
+    case PROCESSOR_BDVER4:
+      def_or_undef (parse_in, "__tune_bdver4__");
+      break;
     case PROCESSOR_BTVER1:
       def_or_undef (parse_in, "__tune_btver1__");
       break;
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index fdf9d5804df..bceb8f2ef6d 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -58,9 +58,9 @@ extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
 extern int avx_vpermilp_parallel (rtx par, enum machine_mode mode);
 extern int avx_vperm2f128_parallel (rtx par, enum machine_mode mode);
 
-extern bool ix86_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx);
-extern bool ix86_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx);
 extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx);
+extern bool ix86_expand_set_or_movmem (rtx, rtx, rtx, rtx, rtx, rtx,
+				       rtx, rtx, rtx, rtx, bool);
 
 extern bool constant_address_p (rtx);
 extern bool legitimate_pic_operand_p (rtx);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8b8cdfae681..c818cb9dc4a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23,6 +23,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "calls.h"
+#include "stor-layout.h"
+#include "varasm.h"
 #include "tm_p.h"
 #include "regs.h"
 #include "hard-reg-set.h"
@@ -48,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "reload.h"
 #include "cgraph.h"
 #include "gimple.h"
+#include "gimplify.h"
 #include "dwarf2.h"
 #include "df.h"
 #include "tm-constrs.h"
@@ -1160,6 +1166,92 @@ struct processor_costs bdver3_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+/*  BDVER4 has optimized REP instruction for medium sized blocks, but for
+    very small blocks it is better to use loop. For large blocks, libcall
+    can do nontemporary accesses and beat inline considerably.  */
+static stringop_algs bdver4_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs bdver4_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+struct processor_costs bdver4_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (4),			/*				 SI */
+   COSTS_N_INSNS (6),			/*				 DI */
+   COSTS_N_INSNS (6)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {5, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {5, 5, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode */
+  {4, 4, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 4},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 4},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 4},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  2,					/* MMX or SSE register to integer */
+  16,					/* size of l1 cache.  */
+  2048,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
+
+  bdver4_memcpy,
+  bdver4_memset,
+  6,					/* scalar_stmt_cost.  */
+  4,					/* scalar load_cost.  */
+  4,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  4,					/* vec_align_load_cost.  */
+  4,					/* vec_unalign_load_cost.  */
+  4,					/* vec_store_cost.  */
+  2,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
   /* BTVER1 has optimized REP instruction for medium sized blocks, but for
      very small blocks it is better to use loop. For large blocks, libcall can
      do nontemporary accesses and beat inline considerably.  */
@@ -1849,9 +1941,10 @@ const struct processor_costs *ix86_cost = &pentium_cost;
 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
+#define m_BDVER4 (1<<PROCESSOR_BDVER4)
 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
-#define m_BDVER	(m_BDVER1 | m_BDVER2 | m_BDVER3)
+#define m_BDVER	(m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
 #define m_BTVER (m_BTVER1 | m_BTVER2)
 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
 
@@ -2309,6 +2402,7 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
   {&bdver1_cost, 16, 10, 16, 7, 11},
   {&bdver2_cost, 16, 10, 16, 7, 11},
   {&bdver3_cost, 16, 10, 16, 7, 11},
+  {&bdver4_cost, 16, 10, 16, 7, 11},
   {&btver1_cost, 16, 10, 16, 7, 11},
   {&btver2_cost, 16, 10, 16, 7, 11},
   {&atom_cost, 16, 15, 16, 7, 16},
@@ -2346,6 +2440,7 @@ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
   "bdver1",
   "bdver2",
   "bdver3",
+  "bdver4",
   "btver1",
   "btver2"
 };
@@ -3106,6 +3201,13 @@ ix86_option_override_internal (bool main_args_p,
 	| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
 	| PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE 
 	| PTA_XSAVEOPT | PTA_FSGSBASE},
+     {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
+        PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+        | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
+        | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 
+	| PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 
+	| PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR 
+	| PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
       {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
 	PTA_64BIT | PTA_MMX |  PTA_SSE  | PTA_SSE2 | PTA_SSE3
 	| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
@@ -3810,7 +3912,7 @@ ix86_option_override_internal (bool main_args_p,
      functions. */
   else if (fast_math_flags_set_p (&global_options)
 	   && TARGET_SSE2_P (opts->x_ix86_isa_flags))
-    ix86_fpmath = FPMATH_SSE;
+    opts->x_ix86_fpmath = FPMATH_SSE;
   else
     opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
 
@@ -3974,10 +4076,10 @@ ix86_option_override_internal (bool main_args_p,
       if (flag_expensive_optimizations
 	  && !(opts_set->x_target_flags & MASK_VZEROUPPER))
 	opts->x_target_flags |= MASK_VZEROUPPER;
-      if (!ix86_tune_features[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL]
+      if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
 	  && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
 	opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
-      if (!ix86_tune_features[X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL]
+      if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
 	  && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
 	opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
       /* Enable 128-bit AVX instruction generation
@@ -5491,7 +5593,7 @@ ix86_eax_live_at_start_p (void)
      to correct at this point.  This gives false positives for broken
      functions that might use uninitialized data that happens to be
      allocated in eax, but who cares?  */
-  return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
+  return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
 }
 
 static bool
@@ -6173,7 +6275,7 @@ classify_argument (enum machine_mode mode, const_tree type,
 		      for (i = (int_bit_position (field)
 				+ (bit_offset % 64)) / 8 / 8;
 			   i < ((int_bit_position (field) + (bit_offset % 64))
-			        + tree_low_cst (DECL_SIZE (field), 0)
+			        + tree_to_shwi (DECL_SIZE (field))
 				+ 63) / 8 / 8; i++)
 			classes[i] =
 			  merge_classes (X86_64_INTEGER_CLASS,
@@ -9199,7 +9301,7 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
      Recompute the value as needed.  Do not recompute when amount of registers
      didn't change as reload does multiple calls to the function and does not
      expect the decision to change within single iteration.  */
-  else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR)
+  else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
            && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
     {
       int count = frame->nregs;
@@ -11288,7 +11390,7 @@ ix86_expand_epilogue (int style)
       /* Leave results in shorter dependency chains on CPUs that are
 	 able to grok it fast.  */
       else if (TARGET_USE_LEAVE
-	       || optimize_bb_for_size_p (EXIT_BLOCK_PTR)
+	       || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
 	       || !cfun->machine->use_fast_prologue_epilogue)
 	ix86_emit_leave ();
       else
@@ -11772,30 +11874,6 @@ ix86_live_on_entry (bitmap regs)
     }
 }
 
-/* Determine if op is suitable SUBREG RTX for address.  */
-
-static bool
-ix86_address_subreg_operand (rtx op)
-{
-  enum machine_mode mode;
-
-  if (!REG_P (op))
-    return false;
-
-  mode = GET_MODE (op);
-
-  if (GET_MODE_CLASS (mode) != MODE_INT)
-    return false;
-
-  /* Don't allow SUBREGs that span more than a word.  It can lead to spill
-     failures when the register is one word out of a two word structure.  */
-  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
-    return false;
-
-  /* Allow only SUBREGs of non-eliminable hard registers.  */
-  return register_no_elim_operand (op, mode);
-}
-
 /* Extract the parts of an RTL expression that is a valid memory address
    for an instruction.  Return 0 if the structure of the address is
    grossly off.  Return -1 if the address contains ASHIFT, so it is not
@@ -11852,7 +11930,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
     base = addr;
   else if (GET_CODE (addr) == SUBREG)
     {
-      if (ix86_address_subreg_operand (SUBREG_REG (addr)))
+      if (REG_P (SUBREG_REG (addr)))
 	base = addr;
       else
 	return 0;
@@ -11916,7 +11994,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
 	      break;
 
 	    case SUBREG:
-	      if (!ix86_address_subreg_operand (SUBREG_REG (op)))
+	      if (!REG_P (SUBREG_REG (op)))
 		return 0;
 	      /* FALLTHRU */
 
@@ -11961,19 +12039,6 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
       scale = 1 << scale;
       retval = -1;
     }
-  else if (CONST_INT_P (addr))
-    {
-      if (!x86_64_immediate_operand (addr, VOIDmode))
-	return 0;
-
-      /* Constant addresses are sign extended to 64bit, we have to
-	 prevent addresses from 0x80000000 to 0xffffffff in x32 mode.  */
-      if (TARGET_X32
-	  && val_signbit_known_set_p (SImode, INTVAL (addr)))
-	return 0;
-
-      disp = addr;
-    }
   else
     disp = addr;			/* displacement */
 
@@ -11982,18 +12047,12 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
       if (REG_P (index))
 	;
       else if (GET_CODE (index) == SUBREG
-	       && ix86_address_subreg_operand (SUBREG_REG (index)))
+	       && REG_P (SUBREG_REG (index)))
 	;
       else
 	return 0;
     }
 
-/* Address override works only on the (%reg) part of %fs:(%reg).  */
-  if (seg != SEG_DEFAULT
-      && ((base && GET_MODE (base) != word_mode)
-	  || (index && GET_MODE (index) != word_mode)))
-    return 0;
-
   /* Extract the integral value of scale.  */
   if (scale_rtx)
     {
@@ -12510,6 +12569,45 @@ ix86_legitimize_reload_address (rtx x,
   return false;
 }
 
+/* Determine if op is suitable RTX for an address register.
+   Return naked register if a register or a register subreg is
+   found, otherwise return NULL_RTX.  */
+
+static rtx
+ix86_validate_address_register (rtx op)
+{
+  enum machine_mode mode = GET_MODE (op);
+
+  /* Only SImode or DImode registers can form the address.  */
+  if (mode != SImode && mode != DImode)
+    return NULL_RTX;
+
+  if (REG_P (op))
+    return op;
+  else if (GET_CODE (op) == SUBREG)
+    {
+      rtx reg = SUBREG_REG (op);
+
+      if (!REG_P (reg))
+	return NULL_RTX;
+
+      mode = GET_MODE (reg);
+
+      /* Don't allow SUBREGs that span more than a word.  It can
+	 lead to spill failures when the register is one word out
+	 of a two word structure.  */
+      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	return NULL_RTX;
+
+      /* Allow only SUBREGs of non-eliminable hard registers.  */
+      if (register_no_elim_operand (reg, mode))
+	return reg;
+    }
+
+  /* Op is not a register.  */
+  return NULL_RTX;
+}
+
 /* Recognizes RTL expressions that are valid memory addresses for an
    instruction.  The MODE argument is the machine mode for the MEM
    expression that wants to use this address.
@@ -12525,6 +12623,7 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
   struct ix86_address parts;
   rtx base, index, disp;
   HOST_WIDE_INT scale;
+  enum ix86_address_seg seg;
 
   if (ix86_decompose_address (addr, &parts) <= 0)
     /* Decomposition failed.  */
@@ -12534,21 +12633,14 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
   index = parts.index;
   disp = parts.disp;
   scale = parts.scale;
+  seg = parts.seg;
 
   /* Validate base register.  */
   if (base)
     {
-      rtx reg;
+      rtx reg = ix86_validate_address_register (base);
 
-      if (REG_P (base))
-  	reg = base;
-      else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
-	reg = SUBREG_REG (base);
-      else
-	/* Base is not a register.  */
-	return false;
-
-      if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
+      if (reg == NULL_RTX)
 	return false;
 
       if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
@@ -12560,17 +12652,9 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
   /* Validate index register.  */
   if (index)
     {
-      rtx reg;
+      rtx reg = ix86_validate_address_register (index);
 
-      if (REG_P (index))
-  	reg = index;
-      else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
-	reg = SUBREG_REG (index);
-      else
-	/* Index is not a register.  */
-	return false;
-
-      if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
+      if (reg == NULL_RTX)
 	return false;
 
       if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
@@ -12584,6 +12668,12 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
       && GET_MODE (base) != GET_MODE (index))
     return false;
 
+  /* Address override works only on the (%reg) part of %fs:(%reg).  */
+  if (seg != SEG_DEFAULT
+      && ((base && GET_MODE (base) != word_mode)
+	  || (index && GET_MODE (index) != word_mode)))
+    return false;
+
   /* Validate scale factor.  */
   if (scale != 1)
     {
@@ -12705,6 +12795,12 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
 	       && !x86_64_immediate_operand (disp, VOIDmode))
 	/* Displacement is out of range.  */
 	return false;
+      /* In x32 mode, constant addresses are sign extended to 64bit, so
+	 we have to prevent addresses from 0x80000000 to 0xffffffff.  */
+      else if (TARGET_X32 && !(index || base)
+	       && CONST_INT_P (disp)
+	       && val_signbit_known_set_p (SImode, INTVAL (disp)))
+	return false;
     }
 
   /* Everything looks valid.  */
@@ -14855,6 +14951,11 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	  /* We do not want to print value of the operand.  */
 	  return;
 
+	case 'N':
+	  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
+	    fputs ("{z}", file);
+	  return;
+
 	case '*':
 	  if (ASSEMBLER_DIALECT == ASM_ATT)
 	    putc ('*', file);
@@ -23615,10 +23716,12 @@ promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
 
      3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
 	with specified algorithm.  */
-static bool
+bool
 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
-			      rtx align_exp, rtx expected_align_exp,
-			      rtx expected_size_exp, bool issetmem)
+			   rtx align_exp, rtx expected_align_exp,
+			   rtx expected_size_exp, rtx min_size_exp,
+			   rtx max_size_exp, rtx probable_max_size_exp,
+			   bool issetmem)
 {
   rtx destreg;
   rtx srcreg = NULL;
@@ -23642,6 +23745,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
   /* TODO: Once vlaue ranges are available, fill in proper data.  */
   unsigned HOST_WIDE_INT min_size = 0;
   unsigned HOST_WIDE_INT max_size = -1;
+  unsigned HOST_WIDE_INT probable_max_size = -1;
   bool misaligned_prologue_used = false;
 
   if (CONST_INT_P (align_exp))
@@ -23657,9 +23761,19 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
     align = MEM_ALIGN (dst) / BITS_PER_UNIT;
 
   if (CONST_INT_P (count_exp))
-    min_size = max_size = count = expected_size = INTVAL (count_exp);
-  if (CONST_INT_P (expected_size_exp) && count == 0)
-    expected_size = INTVAL (expected_size_exp);
+    min_size = max_size = probable_max_size = count = expected_size
+      = INTVAL (count_exp);
+  else
+    {
+      if (min_size_exp)
+	min_size = INTVAL (min_size_exp);
+      if (max_size_exp)
+	max_size = INTVAL (max_size_exp);
+      if (probable_max_size_exp)
+	probable_max_size = INTVAL (probable_max_size_exp);
+      if (CONST_INT_P (expected_size_exp) && count == 0)
+	expected_size = INTVAL (expected_size_exp);
+     }
 
   /* Make sure we don't need to care about overflow later on.  */
   if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
@@ -23667,7 +23781,8 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
 
   /* Step 0: Decide on preferred algorithm, desired alignment and
      size of chunks to be copied by main loop.  */
-  alg = decide_alg (count, expected_size, min_size, max_size, issetmem,
+  alg = decide_alg (count, expected_size, min_size, probable_max_size,
+		    issetmem,
 		    issetmem && val_exp == const0_rtx,
 		    &dynamic_check, &noalign);
   if (alg == libcall)
@@ -24062,24 +24177,6 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
   return true;
 }
 
-/* Wrapper for ix86_expand_set_or_movmem for memcpy case.  */
-bool
-ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
-		    rtx expected_align_exp, rtx expected_size_exp)
-{
-  return ix86_expand_set_or_movmem (dst, src, count_exp, NULL, align_exp,
-		    expected_align_exp, expected_size_exp, false);
-}
-
-/* Wrapper for ix86_expand_set_or_movmem for memset case.  */
-bool
-ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
-		    rtx expected_align_exp, rtx expected_size_exp)
-{
-  return ix86_expand_set_or_movmem (dst, NULL, count_exp, val_exp, align_exp,
-		      expected_align_exp, expected_size_exp, true);
-}
-
 
 /* Expand the appropriate insns for doing strlen if not just doing
    repnz; scasb
@@ -24858,6 +24955,7 @@ ix86_issue_rate (void)
     case PROCESSOR_BDVER1:
     case PROCESSOR_BDVER2:
     case PROCESSOR_BDVER3:
+    case PROCESSOR_BDVER4:
     case PROCESSOR_CORE2:
     case PROCESSOR_COREI7:
     case PROCESSOR_COREI7_AVX:
@@ -25118,6 +25216,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
     case PROCESSOR_BDVER1:
     case PROCESSOR_BDVER2:
     case PROCESSOR_BDVER3:
+    case PROCESSOR_BDVER4:
     case PROCESSOR_BTVER1:
     case PROCESSOR_BTVER2:
     case PROCESSOR_GENERIC:
@@ -25237,6 +25336,7 @@ ia32_multipass_dfa_lookahead (void)
     case PROCESSOR_BDVER1:
     case PROCESSOR_BDVER2:
     case PROCESSOR_BDVER3:
+    case PROCESSOR_BDVER4:
       /* We use lookahead value 4 for BD both before and after reload
 	 schedules. Plan is to have value 8 included for O3. */
         return 4;
@@ -29235,7 +29335,7 @@ ix86_builtin_tm_load (tree type)
 {
   if (TREE_CODE (type) == VECTOR_TYPE)
     {
-      switch (tree_low_cst (TYPE_SIZE (type), 1))
+      switch (tree_to_uhwi (TYPE_SIZE (type)))
 	{
 	case 64:
 	  return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
@@ -29255,7 +29355,7 @@ ix86_builtin_tm_store (tree type)
 {
   if (TREE_CODE (type) == VECTOR_TYPE)
     {
-      switch (tree_low_cst (TYPE_SIZE (type), 1))
+      switch (tree_to_uhwi (TYPE_SIZE (type)))
 	{
 	case 64:
 	  return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
@@ -29738,7 +29838,7 @@ add_condition_to_bb (tree function_decl, tree version_decl,
   make_edge (bb1, bb3, EDGE_FALSE_VALUE); 
 
   remove_edge (e23);
-  make_edge (bb2, EXIT_BLOCK_PTR, 0);
+  make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
 
   pop_cfun ();
 
@@ -30608,7 +30708,6 @@ ix86_generate_version_dispatcher_body (void *node_p)
 {
   tree resolver_decl;
   basic_block empty_bb;
-  vec<tree> fn_ver_vec = vNULL;
   tree default_ver_decl;
   struct cgraph_node *versn;
   struct cgraph_node *node;
@@ -30638,7 +30737,7 @@ ix86_generate_version_dispatcher_body (void *node_p)
 
   push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
 
-  fn_ver_vec.create (2);
+  stack_vec<tree, 2> fn_ver_vec;
 
   for (versn_info = node_version_info->next; versn_info;
        versn_info = versn_info->next)
@@ -30656,7 +30755,6 @@ ix86_generate_version_dispatcher_body (void *node_p)
     }
 
   dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
-  fn_ver_vec.release ();
   rebuild_cgraph_edges (); 
   pop_cfun ();
   return resolver_decl;
@@ -30772,7 +30870,8 @@ fold_builtin_cpu (tree fndecl, tree *args)
     M_AMDFAM10H_ISTANBUL,
     M_AMDFAM15H_BDVER1,
     M_AMDFAM15H_BDVER2,
-    M_AMDFAM15H_BDVER3
+    M_AMDFAM15H_BDVER3,
+    M_AMDFAM15H_BDVER4
   };
 
   static struct _arch_names_table
@@ -30799,6 +30898,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
       {"bdver1", M_AMDFAM15H_BDVER1},
       {"bdver2", M_AMDFAM15H_BDVER2},
       {"bdver3", M_AMDFAM15H_BDVER3},
+      {"bdver4", M_AMDFAM15H_BDVER4},
     };
 
   static struct _isa_names_table
@@ -32475,7 +32575,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
       gcc_assert (target == 0);
       if (memory)
 	{
-	  op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
+	  op = ix86_zero_extend_to_Pmode (op);
 	  target = gen_rtx_MEM (tmode, op);
 	}
       else
@@ -32520,7 +32620,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
 	  if (i == memory)
 	    {
 	      /* This must be the memory operand.  */
-	      op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
+	      op = ix86_zero_extend_to_Pmode (op);
 	      op = gen_rtx_MEM (mode, op);
 	      gcc_assert (GET_MODE (op) == mode
 			  || GET_MODE (op) == VOIDmode);
@@ -32573,8 +32673,8 @@ get_element_number (tree vec_type, tree arg)
 {
   unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
 
-  if (!host_integerp (arg, 1)
-      || (elt = tree_low_cst (arg, 1), elt > max))
+  if (!tree_fits_uhwi_p (arg)
+      || (elt = tree_to_uhwi (arg), elt > max))
     {
       error ("selector must be an integer constant in the range 0..%wi", max);
       return 0;
@@ -32768,7 +32868,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
       mode1 = insn_data[icode].operand[1].mode;
       mode2 = insn_data[icode].operand[2].mode;
 
-      op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
+      op0 = ix86_zero_extend_to_Pmode (op0);
       op0 = gen_rtx_MEM (mode1, op0);
 
       if (!insn_data[icode].operand[0].predicate (op0, mode0))
@@ -32800,7 +32900,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
 	op0 = expand_normal (arg0);
 	icode = CODE_FOR_sse2_clflush;
 	if (!insn_data[icode].operand[0].predicate (op0, Pmode))
-	  op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
+	  op0 = ix86_zero_extend_to_Pmode (op0);
 
 	emit_insn (gen_sse2_clflush (op0));
 	return 0;
@@ -32813,7 +32913,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
       op1 = expand_normal (arg1);
       op2 = expand_normal (arg2);
       if (!REG_P (op0))
-	op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
+	op0 = ix86_zero_extend_to_Pmode (op0);
       if (!REG_P (op1))
 	op1 = copy_to_mode_reg (SImode, op1);
       if (!REG_P (op2))
@@ -33070,7 +33170,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
       op0 = expand_normal (arg0);
       icode = CODE_FOR_lwp_llwpcb;
       if (!insn_data[icode].operand[0].predicate (op0, Pmode))
-	op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
+	op0 = ix86_zero_extend_to_Pmode (op0);
       emit_insn (gen_lwp_llwpcb (op0));
       return 0;
 
@@ -33366,7 +33466,7 @@ addcarryx:
       /* Force memory operand only with base register here.  But we
 	 don't want to do it on memory operand for other builtin
 	 functions.  */
-      op1 = force_reg (Pmode, convert_to_mode (Pmode, op1, 1));
+      op1 = ix86_zero_extend_to_Pmode (op1);
 
       if (!insn_data[icode].operand[1].predicate (op0, mode0))
 	op0 = copy_to_mode_reg (mode0, op0);
@@ -36471,7 +36571,7 @@ ix86_pad_returns (void)
   edge e;
   edge_iterator ei;
 
-  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
     {
       basic_block bb = e->src;
       rtx ret = BB_END (bb);
@@ -36571,14 +36671,14 @@ ix86_count_insn (basic_block bb)
       edge prev_e;
       edge_iterator prev_ei;
 
-      if (e->src == ENTRY_BLOCK_PTR)
+      if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
 	{
 	  min_prev_count = 0;
 	  break;
 	}
       FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
 	{
-	  if (prev_e->src == ENTRY_BLOCK_PTR)
+	  if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
 	    {
 	      int count = ix86_count_insn_bb (e->src);
 	      if (count < min_prev_count)
@@ -36602,7 +36702,7 @@ ix86_pad_short_function (void)
   edge e;
   edge_iterator ei;
 
-  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
     {
       rtx ret = BB_END (e->src);
       if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
@@ -36642,7 +36742,7 @@ ix86_seh_fixup_eh_fallthru (void)
   edge e;
   edge_iterator ei;
 
-  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
     {
       rtx insn, next;
 
@@ -43380,7 +43480,7 @@ do_dispatch (rtx insn, int mode)
 static bool
 has_dispatch (rtx insn, int action)
 {
-  if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3)
+  if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
       && flag_dispatch_scheduler)
     switch (action)
       {
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 123e3faed4b..2fd5fddbdc6 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -311,6 +311,7 @@ extern const struct processor_costs ix86_size_cost;
 #define TARGET_BDVER1 (ix86_tune == PROCESSOR_BDVER1)
 #define TARGET_BDVER2 (ix86_tune == PROCESSOR_BDVER2)
 #define TARGET_BDVER3 (ix86_tune == PROCESSOR_BDVER3)
+#define TARGET_BDVER4 (ix86_tune == PROCESSOR_BDVER4)
 #define TARGET_BTVER1 (ix86_tune == PROCESSOR_BTVER1)
 #define TARGET_BTVER2 (ix86_tune == PROCESSOR_BTVER2)
 #define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM)
@@ -639,6 +640,7 @@ enum target_cpu_default
   TARGET_CPU_DEFAULT_bdver1,
   TARGET_CPU_DEFAULT_bdver2,
   TARGET_CPU_DEFAULT_bdver3,
+  TARGET_CPU_DEFAULT_bdver4,
   TARGET_CPU_DEFAULT_btver1,
   TARGET_CPU_DEFAULT_btver2,
 
@@ -2247,6 +2249,7 @@ enum processor_type
   PROCESSOR_BDVER1,
   PROCESSOR_BDVER2,
   PROCESSOR_BDVER3,
+  PROCESSOR_BDVER4,
   PROCESSOR_BTVER1,
   PROCESSOR_BTVER2,
   PROCESSOR_ATOM,
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a37fa64a03e..8178f9b9ab0 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -368,7 +368,8 @@
 
 ;; Processor type.
 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,corei7,
-		    atom,slm,generic,amdfam10,bdver1,bdver2,bdver3,btver1,btver2"
+		    atom,slm,generic,amdfam10,bdver1,bdver2,bdver3,bdver4,
+		    btver1,btver2"
   (const (symbol_ref "ix86_schedule")))
 
 ;; A basic instruction type.  Refinements due to arguments to be
@@ -7978,7 +7979,18 @@
 	 (const_int 0)))
    (set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm")
 	(and:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+  "TARGET_64BIT
+   && ix86_match_ccmode
+	(insn,
+	 /* If we are going to emit andl instead of andq, and the operands[2]
+	    constant might have the SImode sign bit set, make sure the sign
+	    flag isn't tested, because the instruction will set the sign flag
+	    based on bit 31 rather than bit 63.  If it isn't CONST_INT,
+	    conservatively assume it might have bit 31 set.  */
+	 (satisfies_constraint_Z (operands[2])
+	  && (!CONST_INT_P (operands[2])
+	      || val_signbit_known_set_p (SImode, INTVAL (operands[2]))))
+	 ? CCZmode : CCNOmode)
    && ix86_binary_operator_ok (AND, DImode, operands)"
   "@
    and{l}\t{%k2, %k0|%k0, %k2}
@@ -15492,11 +15504,17 @@
    (use (match_operand:SWI48 2 "nonmemory_operand"))
    (use (match_operand:SWI48 3 "const_int_operand"))
    (use (match_operand:SI 4 "const_int_operand"))
-   (use (match_operand:SI 5 "const_int_operand"))]
+   (use (match_operand:SI 5 "const_int_operand"))
+   (use (match_operand:SI 6 ""))
+   (use (match_operand:SI 7 ""))
+   (use (match_operand:SI 8 ""))]
   ""
 {
- if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3],
-			 operands[4], operands[5]))
+ if (ix86_expand_set_or_movmem (operands[0], operands[1],
+			        operands[2], NULL, operands[3],
+			        operands[4], operands[5],
+				operands[6], operands[7],
+				operands[8], false))
    DONE;
  else
    FAIL;
@@ -15684,12 +15702,17 @@
     (use (match_operand:QI 2 "nonmemory_operand"))
     (use (match_operand 3 "const_int_operand"))
     (use (match_operand:SI 4 "const_int_operand"))
-    (use (match_operand:SI 5 "const_int_operand"))]
+    (use (match_operand:SI 5 "const_int_operand"))
+    (use (match_operand:SI 6 ""))
+    (use (match_operand:SI 7 ""))
+    (use (match_operand:SI 8 ""))]
   ""
 {
- if (ix86_expand_setmem (operands[0], operands[1],
-			 operands[2], operands[3],
-			 operands[4], operands[5]))
+ if (ix86_expand_set_or_movmem (operands[0], NULL,
+			        operands[1], operands[2],
+				operands[3], operands[4],
+			        operands[5], operands[6],
+				operands[7], operands[8], true))
    DONE;
  else
    FAIL;
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 1a1b8abefdc..c4f9c8cb28e 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -444,7 +444,7 @@ the function.
 
 mdispatch-scheduler
 Target RejectNegative Var(flag_dispatch_scheduler)
-Do dispatch scheduling if processor is bdver1 or bdver2 or bdver3 and Haifa scheduling
+Do dispatch scheduling if processor is bdver1 or bdver2 or bdver3 or bdver4 and Haifa scheduling
 is selected.
 
 mprefer-avx128
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index e5dd90cfad2..66ac52fd8c4 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -687,6 +687,16 @@
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 3)")))
 
+;; Match 0 to 4.
+(define_predicate "const_0_to_4_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 4)")))
+
+;; Match 0 to 5.
+(define_predicate "const_0_to_5_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 5)")))
+
 ;; Match 0 to 7.
 (define_predicate "const_0_to_7_operand"
   (and (match_code "const_int")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7bb2d7795f6..6d6e16efcc8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -87,6 +87,7 @@
   ;; For AVX512F support
   UNSPEC_VPERMI2
   UNSPEC_VPERMT2
+  UNSPEC_VPERMI2_MASK
   UNSPEC_UNSIGNED_FIX_NOTRUNC
   UNSPEC_UNSIGNED_PCMP
   UNSPEC_TESTM
@@ -101,9 +102,15 @@
   UNSPEC_GETMANT
   UNSPEC_ALIGN
   UNSPEC_CONFLICT
+  UNSPEC_COMPRESS
+  UNSPEC_COMPRESS_STORE
+  UNSPEC_EXPAND
   UNSPEC_MASKED_EQ
   UNSPEC_MASKED_GT
 
+  ;; For embed. rounding feature
+  UNSPEC_EMBEDDED_ROUNDING
+
   ;; For AVX512PF support
   UNSPEC_GATHER_PREFETCH
   UNSPEC_SCATTER_PREFETCH
@@ -551,6 +558,12 @@
    (V8SF "7") (V4DF "3")
    (V4SF "3") (V2DF "1")])
 
+(define_mode_attr ssescalarsize
+  [(V8DI  "64") (V4DI  "64") (V2DI  "64")
+   (V32HI "16") (V16HI "16") (V8HI "16")
+   (V16SI "32") (V8SI "32") (V4SI "32")
+   (V16SF "32") (V8DF "64")])
+
 ;; SSE prefix for integer vector modes
 (define_mode_attr sseintprefix
   [(V2DI  "p") (V2DF  "")
@@ -607,6 +620,9 @@
 (define_mode_attr bcstscalarsuff
   [(V16SI "d") (V16SF "ss") (V8DI "q") (V8DF "sd")])
 
+;; Include define_subst patterns for instructions with mask
+(include "subst.md")
+
 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -746,6 +762,28 @@
 	      ]
 	      (const_string "<sseinsnmode>")))])
 
+(define_insn "avx512f_load<mode>_mask"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
+	(vec_merge:VI48F_512
+	  (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
+	  (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
+	  (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
+  "TARGET_AVX512F"
+{
+  switch (MODE_<sseinsnmode>)
+    {
+    case MODE_V8DF:
+    case MODE_V16SF:
+      return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
+    default:
+      return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
+    }
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "none,load")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "avx512f_blendm<mode>"
   [(set (match_operand:VI48F_512 0 "register_operand" "=v")
 	(vec_merge:VI48F_512
@@ -758,6 +796,28 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "avx512f_store<mode>_mask"
+  [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
+	(vec_merge:VI48F_512
+	  (match_operand:VI48F_512 1 "register_operand" "v")
+	  (match_dup 0)
+	  (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
+  "TARGET_AVX512F"
+{
+  switch (MODE_<sseinsnmode>)
+    {
+    case MODE_V8DF:
+    case MODE_V16SF:
+      return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+    default:
+      return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+    }
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "store")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "sse2_movq128"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
 	(vec_concat:V2DI
@@ -852,21 +912,21 @@
   DONE;
 })
 
-(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
+(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
   [(set (match_operand:VF 0 "register_operand" "=v")
 	(unspec:VF
 	  [(match_operand:VF 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_LOADU))]
-  "TARGET_SSE"
+  "TARGET_SSE && <mask_mode512bit_condition>"
 {
   switch (get_attr_mode (insn))
     {
     case MODE_V16SF:
     case MODE_V8SF:
     case MODE_V4SF:
-      return "%vmovups\t{%1, %0|%0, %1}";
+      return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
     default:
-      return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
+      return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
     }
 }
   [(set_attr "type" "ssemov")
@@ -913,12 +973,36 @@
 	      ]
 	      (const_string "<MODE>")))])
 
-(define_insn "<sse2_avx_avx512f>_loaddqu<mode>"
+(define_insn "avx512f_storeu<ssemodesuffix>512_mask"
+  [(set (match_operand:VF_512 0 "memory_operand" "=m")
+	(vec_merge:VF_512
+	  (unspec:VF_512
+	    [(match_operand:VF_512 1 "register_operand" "v")]
+	    UNSPEC_STOREU)
+	  (match_dup 0)
+	  (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
+  "TARGET_AVX512F"
+{
+  switch (get_attr_mode (insn))
+    {
+    case MODE_V16SF:
+      return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+    default:
+      return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+    }
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
   [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
 	(unspec:VI_UNALIGNED_LOADSTORE
 	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_LOADU))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && <mask_mode512bit_condition>"
 {
   switch (get_attr_mode (insn))
     {
@@ -927,9 +1011,9 @@
       return "%vmovups\t{%1, %0|%0, %1}";
     case MODE_XI:
       if (<MODE>mode == V8DImode)
-	return "vmovdqu64\t{%1, %0|%0, %1}";
+	return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
       else
-	return "vmovdqu32\t{%1, %0|%0, %1}";
+	return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
     default:
       return "%vmovdqu\t{%1, %0|%0, %1}";
     }
@@ -992,6 +1076,27 @@
 	      ]
 	      (const_string "<sseinsnmode>")))])
 
+(define_insn "avx512f_storedqu<mode>_mask"
+  [(set (match_operand:VI48_512 0 "memory_operand" "=m")
+	(vec_merge:VI48_512
+	  (unspec:VI48_512
+	    [(match_operand:VI48_512 1 "register_operand" "v")]
+	    UNSPEC_STOREU)
+	  (match_dup 0)
+	  (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
+  "TARGET_AVX512F"
+{
+  if (<MODE>mode == V8DImode)
+    return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+  else
+    return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "<sse3>_lddqu<avxsizesuffix>"
   [(set (match_operand:VI1 0 "register_operand" "=x")
 	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
@@ -1119,26 +1224,26 @@
 }
   [(set_attr "isa" "noavx,noavx,avx,avx")])
 
-(define_expand "<plusminus_insn><mode>3"
+(define_expand "<plusminus_insn><mode>3<mask_name>"
   [(set (match_operand:VF 0 "register_operand")
 	(plusminus:VF
 	  (match_operand:VF 1 "nonimmediate_operand")
 	  (match_operand:VF 2 "nonimmediate_operand")))]
-  "TARGET_SSE"
+  "TARGET_SSE && <mask_mode512bit_condition>"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
-(define_insn "*<plusminus_insn><mode>3"
+(define_insn "*<plusminus_insn><mode>3<mask_name>"
   [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(plusminus:VF
 	  (match_operand:VF 1 "nonimmediate_operand" "<comm>0,v")
 	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
   "@
    <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
-   v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+   v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "<mask_prefix3>")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sse>_vm<plusminus_insn><mode>3"
@@ -1158,26 +1263,26 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<ssescalarmode>")])
 
-(define_expand "mul<mode>3"
+(define_expand "mul<mode>3<mask_name>"
   [(set (match_operand:VF 0 "register_operand")
 	(mult:VF
 	  (match_operand:VF 1 "nonimmediate_operand")
 	  (match_operand:VF 2 "nonimmediate_operand")))]
-  "TARGET_SSE"
+  "TARGET_SSE && <mask_mode512bit_condition>"
   "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
 
-(define_insn "*mul<mode>3"
+(define_insn "*mul<mode>3<mask_name>"
   [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(mult:VF
 	  (match_operand:VF 1 "nonimmediate_operand" "%0,v")
 	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+  "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
   "@
    mul<ssemodesuffix>\t{%2, %0|%0, %2}
-   vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+   vmul<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "ssemul")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "<mask_prefix3>")
    (set_attr "btver2_decode" "direct,double")
    (set_attr "mode" "<MODE>")])
 
@@ -1195,7 +1300,7 @@
    v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sse<multdiv_mnemonic>")
-   (set_attr "prefix" "orig,maybe_evex")
+   (set_attr "prefix" "orig,vex")
    (set_attr "btver2_decode" "direct,double")
    (set_attr "mode" "<ssescalarmode>")])
 
@@ -1225,18 +1330,18 @@
     }
 })
 
-(define_insn "<sse>_div<mode>3"
+(define_insn "<sse>_div<mode>3<mask_name>"
   [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(div:VF
 	  (match_operand:VF 1 "register_operand" "0,v")
 	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE"
+  "TARGET_SSE && <mask_mode512bit_condition>"
   "@
    div<ssemodesuffix>\t{%2, %0|%0, %2}
-   vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+   vdiv<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "ssediv")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "<mask_prefix3>")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sse>_rcp<mode>2"
@@ -1269,18 +1374,18 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "SF")])
 
-(define_insn "rcp14<mode>"
+(define_insn "<mask_codefor>rcp14<mode><mask_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(unspec:VF_512
 	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_RCP14))]
   "TARGET_AVX512F"
-  "vrcp14<ssemodesuffix>\t{%1, %0|%0, %1}"
+  "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "srcp14<mode>"
+(define_insn "*srcp14<mode>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
@@ -1316,11 +1421,11 @@
     }
 })
 
-(define_insn "<sse>_sqrt<mode>2"
+(define_insn "<sse>_sqrt<mode>2<mask_name>"
   [(set (match_operand:VF 0 "register_operand" "=v")
 	(sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "vm")))]
-  "TARGET_SSE"
-  "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
+  "TARGET_SSE && <mask_mode512bit_condition>"
+  "%vsqrt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "sse")
    (set_attr "atom_sse_attr" "sqrt")
    (set_attr "btver2_sse_attr" "sqrt")
@@ -1341,8 +1446,8 @@
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sse")
    (set_attr "atom_sse_attr" "sqrt")
-   (set_attr "btver2_sse_attr" "sqrt")
    (set_attr "prefix" "orig,vex")
+   (set_attr "btver2_sse_attr" "sqrt")
    (set_attr "mode" "<ssescalarmode>")])
 
 (define_expand "rsqrt<mode>2"
@@ -1365,18 +1470,18 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "rsqrt14<mode>"
+(define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(unspec:VF_512
 	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_RSQRT14))]
   "TARGET_AVX512F"
-  "vrsqrt14<ssemodesuffix>\t{%1, %0|%0, %1}"
+  "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "rsqrt14<mode>"
+(define_insn "*rsqrt14<mode>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
@@ -1411,47 +1516,49 @@
 ;; isn't really correct, as those rtl operators aren't defined when
 ;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
 
-(define_expand "<code><mode>3"
+(define_expand "<code><mode>3<mask_name>"
   [(set (match_operand:VF 0 "register_operand")
 	(smaxmin:VF
 	  (match_operand:VF 1 "nonimmediate_operand")
 	  (match_operand:VF 2 "nonimmediate_operand")))]
-  "TARGET_SSE"
+  "TARGET_SSE && <mask_mode512bit_condition>"
 {
   if (!flag_finite_math_only)
     operands[1] = force_reg (<MODE>mode, operands[1]);
   ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
 })
 
-(define_insn "*<code><mode>3_finite"
+(define_insn "*<code><mode>3_finite<mask_name>"
   [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(smaxmin:VF
 	  (match_operand:VF 1 "nonimmediate_operand" "%0,v")
 	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE && flag_finite_math_only
-   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && <mask_mode512bit_condition>"
   "@
    <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
-   v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+   v<maxmin_float><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
    (set_attr "btver2_sse_attr" "maxmin")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "<mask_prefix3>")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<code><mode>3"
+(define_insn "*<code><mode>3<mask_name>"
   [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(smaxmin:VF
 	  (match_operand:VF 1 "register_operand" "0,v")
 	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE && !flag_finite_math_only"
+  "TARGET_SSE && !flag_finite_math_only
+   && <mask_mode512bit_condition>"
   "@
    <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
-   v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+   v<maxmin_float><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
    (set_attr "btver2_sse_attr" "maxmin")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "<mask_prefix3>")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sse>_vm<code><mode>3"
@@ -2029,6 +2136,24 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<ssescalarmode>")])
 
+(define_insn "avx512f_vmcmp<mode>3_mask"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+	(and:<avx512fmaskmode>
+	  (unspec:<avx512fmaskmode>
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
+	    UNSPEC_PCMP)
+	  (and:<avx512fmaskmode>
+	    (match_operand:<avx512fmaskmode> 4 "register_operand" "k")
+	    (const_int 1))))]
+  "TARGET_AVX512F"
+  "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0%{%4%}|%0%{%4%}, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
 (define_insn "avx512f_maskcmp<mode>3"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
 	(match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
@@ -2583,7 +2708,39 @@
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fma_fmsub_<mode>"
+(define_insn "avx512f_fmadd_<mode>_mask"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (match_operand:VF_512 1 "register_operand" "0,0")
+	    (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
+	    (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+  "TARGET_AVX512F"
+  "@
+   vfmadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
+   vfmadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmadd_<mode>_mask3"
+  [(set (match_operand:VF_512 0 "register_operand" "=x")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (match_operand:VF_512 1 "register_operand" "x")
+	    (match_operand:VF_512 2 "nonimmediate_operand" "vm")
+	    (match_operand:VF_512 3 "register_operand" "0"))
+	  (match_dup 3)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+  "TARGET_AVX512F"
+  "vfmadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  [(set_attr "isa" "fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmsub_<mode>"
   [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
 	(fma:FMAMODE
 	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0, 0, v, x,x")
@@ -2601,7 +2758,41 @@
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fma_fnmadd_<mode>"
+(define_insn "avx512f_fmsub_<mode>_mask"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (match_operand:VF_512 1 "register_operand" "0,0")
+	    (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
+	    (neg:VF_512
+	      (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")))
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+  "TARGET_AVX512F"
+  "@
+   vfmsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
+   vfmsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmsub_<mode>_mask3"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (match_operand:VF_512 1 "register_operand" "v")
+	    (match_operand:VF_512 2 "nonimmediate_operand" "vm")
+	    (neg:VF_512
+	      (match_operand:VF_512 3 "register_operand" "0")))
+	  (match_dup 3)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+  "TARGET_AVX512F"
+  "vfmsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  [(set_attr "isa" "fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fnmadd_<mode>"
   [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
 	(fma:FMAMODE
 	  (neg:FMAMODE
@@ -2619,6 +2810,40 @@
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "avx512f_fnmadd_<mode>_mask"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (neg:VF_512
+	      (match_operand:VF_512 1 "register_operand" "0,0"))
+	    (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
+	    (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+  "TARGET_AVX512F"
+  "@
+   vfnmadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
+   vfnmadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fnmadd_<mode>_mask3"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (neg:VF_512
+	      (match_operand:VF_512 1 "register_operand" "v"))
+	    (match_operand:VF_512 2 "nonimmediate_operand" "vm")
+	    (match_operand:VF_512 3 "register_operand" "0"))
+	  (match_dup 3)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+  "TARGET_AVX512F"
+  "vfnmadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  [(set_attr "isa" "fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*fma_fnmsub_<mode>"
   [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
 	(fma:FMAMODE
@@ -2638,6 +2863,42 @@
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "avx512f_fnmsub_<mode>_mask"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (neg:VF_512
+	      (match_operand:VF_512 1 "register_operand" "0,0"))
+	    (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
+	    (neg:VF_512
+	      (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")))
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+  "TARGET_AVX512F"
+  "@
+   vfnmsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
+   vfnmsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fnmsub_<mode>_mask3"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(vec_merge:VF_512
+	  (fma:VF_512
+	    (neg:VF_512
+	      (match_operand:VF_512 1 "register_operand" "v"))
+	    (match_operand:VF_512 2 "nonimmediate_operand" "vm")
+	    (neg:VF_512
+	      (match_operand:VF_512 3 "register_operand" "0")))
+	  (match_dup 3)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+  "TARGET_AVX512F"
+  "vfnmsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  [(set_attr "isa" "fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 ;; FMA parallel floating point multiply addsub and subadd operations.
 
 ;; It would be possible to represent these without the UNSPEC as
@@ -2676,6 +2937,40 @@
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "avx512f_fmaddsub_<mode>_mask"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+	(vec_merge:VF_512
+	  (unspec:VF_512
+	    [(match_operand:VF_512 1 "register_operand" "0,0")
+	     (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
+	     (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")]
+	    UNSPEC_FMADDSUB)
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+  "TARGET_AVX512F"
+  "@
+   vfmaddsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
+   vfmaddsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmaddsub_<mode>_mask3"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(vec_merge:VF_512
+	  (unspec:VF_512
+	    [(match_operand:VF_512 1 "register_operand" "v")
+	     (match_operand:VF_512 2 "nonimmediate_operand" "vm")
+	     (match_operand:VF_512 3 "register_operand" "0")]
+	    UNSPEC_FMADDSUB)
+	  (match_dup 3)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+  "TARGET_AVX512F"
+  "vfmaddsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  [(set_attr "isa" "fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*fma_fmsubadd_<mode>"
   [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
 	(unspec:VF
@@ -2695,6 +2990,42 @@
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "avx512f_fmsubadd_<mode>_mask"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+	(vec_merge:VF_512
+	  (unspec:VF_512
+	    [(match_operand:VF_512 1 "register_operand" "0,0")
+	     (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
+	     (neg:VF_512
+	       (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))]
+	    UNSPEC_FMADDSUB)
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+  "TARGET_AVX512F"
+  "@
+   vfmsubadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
+   vfmsubadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmsubadd_<mode>_mask3"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(vec_merge:VF_512
+	  (unspec:VF_512
+	    [(match_operand:VF_512 1 "register_operand" "v")
+	     (match_operand:VF_512 2 "nonimmediate_operand" "vm")
+	     (neg:VF_512
+	       (match_operand:VF_512 3 "register_operand" "0"))]
+	    UNSPEC_FMADDSUB)
+	  (match_dup 3)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+  "TARGET_AVX512F"
+  "vfmsubadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  [(set_attr "isa" "fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 ;; FMA3 floating point scalar intrinsics. These merge result with
 ;; high-order elements from the destination register.
 
@@ -3018,7 +3349,7 @@
   [(set (match_operand:DI 0 "register_operand" "=r,r")
 	(fix:DI
 	  (vec_select:SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
+	    (match_operand:V4SF 1 "nonimmediate_operand" "v,vm")
 	    (parallel [(const_int 0)]))))]
   "TARGET_SSE && TARGET_64BIT"
   "%vcvttss2si{q}\t{%1, %0|%0, %k1}"
@@ -3058,22 +3389,22 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<ssescalarmode>")])
 
-(define_insn "float<sseintvecmodelower><mode>2"
+(define_insn "float<sseintvecmodelower><mode>2<mask_name>"
   [(set (match_operand:VF1 0 "register_operand" "=v")
 	(float:VF1
 	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
-  "TARGET_SSE2"
-  "%vcvtdq2ps\t{%1, %0|%0, %1}"
+  "TARGET_SSE2 && <mask_mode512bit_condition>"
+  "%vcvtdq2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "ufloatv16siv16sf2"
+(define_insn "ufloatv16siv16sf2<mask_name>"
   [(set (match_operand:V16SF 0 "register_operand" "=v")
 	(unsigned_float:V16SF
 	  (match_operand:V16SI 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512F"
-  "vcvtudq2ps\t{%1, %0|%0, %1}"
+  "vcvtudq2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V16SF")])
@@ -3108,34 +3439,34 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx512f_fix_notruncv16sfv16si"
+(define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name>"
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(unspec:V16SI
 	  [(match_operand:V16SF 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_FIX_NOTRUNC))]
   "TARGET_AVX512F"
-  "vcvtps2dq\t{%1, %0|%0, %1}"
+  "vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_insn "avx512f_ufix_notruncv16sfv16si"
+(define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name>"
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(unspec:V16SI
 	  [(match_operand:V16SF 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
   "TARGET_AVX512F"
-  "vcvtps2udq\t{%1, %0|%0, %1}"
+  "vcvtps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_insn "<fixsuffix>fix_truncv16sfv16si2"
+(define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name>"
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(any_fix:V16SI
 	  (match_operand:V16SF 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512F"
-  "vcvttps2<fixsuffix>dq\t{%1, %0|%0, %1}"
+  "vcvttps2<fixsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
@@ -3465,20 +3796,21 @@
 (define_mode_attr si2dfmodelower
   [(V8DF "v8si") (V4DF "v4si")])
 
-(define_insn "float<si2dfmodelower><mode>2"
+(define_insn "float<si2dfmodelower><mode>2<mask_name>"
   [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
 	(float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX"
-  "vcvtdq2pd\t{%1, %0|%0, %1}"
+  "TARGET_AVX && <mask_mode512bit_condition>"
+  "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "ufloatv8siv8df"
+(define_insn "ufloatv8siv8df<mask_name>"
   [(set (match_operand:V8DF 0 "register_operand" "=v")
-	(unsigned_float:V8DF (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
+	(unsigned_float:V8DF
+	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512F"
-  "vcvtudq2pd\t{%1, %0|%0, %1}"
+  "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V8DF")])
@@ -3523,12 +3855,13 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V2DF")])
 
-(define_insn "avx512f_cvtpd2dq512"
+(define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name>"
   [(set (match_operand:V8SI 0 "register_operand" "=v")
-	(unspec:V8SI [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
-		     UNSPEC_FIX_NOTRUNC))]
+	(unspec:V8SI
+	  [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_FIX_NOTRUNC))]
   "TARGET_AVX512F"
-  "vcvtpd2dq\t{%1, %0|%0, %1}"
+  "vcvtpd2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "OI")])
@@ -3596,22 +3929,23 @@
    (set_attr "athlon_decode" "vector")
    (set_attr "bdver1_decode" "double")])
 
-(define_insn "avx512f_ufix_notruncv8dfv8si"
+(define_insn "avx512f_ufix_notruncv8dfv8si<mask_name>"
   [(set (match_operand:V8SI 0 "register_operand" "=v")
 	(unspec:V8SI
 	  [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
   "TARGET_AVX512F"
-  "vcvtpd2udq\t{%1, %0|%0, %1}"
+  "vcvtpd2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "OI")])
 
-(define_insn "<fixsuffix>fix_truncv8dfv8si2"
+(define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name>"
   [(set (match_operand:V8SI 0 "register_operand" "=v")
-	(any_fix:V8SI (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
+	(any_fix:V8SI
+	  (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512F"
-  "vcvttpd2<fixsuffix>dq\t{%1, %0|%0, %1}"
+  "vcvttpd2<fixsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "OI")])
@@ -3717,12 +4051,12 @@
    (set_attr "prefix" "orig,orig,vex")
    (set_attr "mode" "DF")])
 
-(define_insn "avx512f_cvtpd2ps512"
+(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name>"
   [(set (match_operand:V8SF 0 "register_operand" "=v")
 	(float_truncate:V8SF
 	  (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512F"
-  "vcvtpd2ps\t{%1, %0|%0, %1}"
+  "vcvtpd2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V8SF")])
@@ -3772,12 +4106,12 @@
 (define_mode_attr sf2dfmode
   [(V8DF "V8SF") (V4DF "V4SF")])
 
-(define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix>"
+(define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name>"
   [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
 	(float_extend:VF2_512_256
 	  (match_operand:<sf2dfmode> 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX"
-  "vcvtps2pd\t{%1, %0|%0, %1}"
+  "TARGET_AVX && <mask_mode512bit_condition>"
+  "vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])
@@ -4126,6 +4460,30 @@
   DONE;
 })
 
+(define_expand "vec_unpacku_float_lo_v16si"
+  [(match_operand:V8DF 0 "register_operand")
+   (match_operand:V16SI 1 "nonimmediate_operand")]
+  "TARGET_AVX512F"
+{
+  REAL_VALUE_TYPE TWO32r;
+  rtx k, x, tmp[3];
+
+  real_ldexp (&TWO32r, &dconst1, 32);
+  x = const_double_from_real_value (TWO32r, DFmode);
+
+  tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
+  tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
+  tmp[2] = gen_reg_rtx (V8DFmode);
+  k = gen_reg_rtx (QImode);
+
+  emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
+  emit_insn (gen_rtx_SET (VOIDmode, k,
+			  gen_rtx_LT (QImode, tmp[2], tmp[0])));
+  emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
+  emit_move_insn (operands[0], tmp[2]);
+  DONE;
+})
+
 (define_expand "vec_pack_trunc_<mode>"
   [(set (match_dup 3)
 	(float_truncate:<sf2dfmode>
@@ -4415,7 +4773,7 @@
    (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
    (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
 
-(define_insn "avx512f_unpckhps512"
+(define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
   [(set (match_operand:V16SF 0 "register_operand" "=v")
 	(vec_select:V16SF
 	  (vec_concat:V32SF
@@ -4430,7 +4788,7 @@
 		     (const_int 14) (const_int 30)
 		     (const_int 15) (const_int 31)])))]
   "TARGET_AVX512F"
-  "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+  "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V16SF")])
@@ -4503,7 +4861,7 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "V4SF")])
 
-(define_insn "avx512f_unpcklps512"
+(define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
   [(set (match_operand:V16SF 0 "register_operand" "=v")
 	(vec_select:V16SF
 	  (vec_concat:V32SF
@@ -4518,7 +4876,7 @@
 		     (const_int 12) (const_int 28)
 		     (const_int 13) (const_int 29)])))]
   "TARGET_AVX512F"
-  "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
+  "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V16SF")])
@@ -4626,7 +4984,7 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V4SF")])
 
-(define_insn "avx512f_movshdup512"
+(define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
   [(set (match_operand:V16SF 0 "register_operand" "=v")
 	(vec_select:V16SF
 	  (vec_concat:V32SF
@@ -4641,7 +4999,7 @@
 		     (const_int 13) (const_int 13)
 		     (const_int 15) (const_int 15)])))]
   "TARGET_AVX512F"
-  "vmovshdup\t{%1, %0|%0, %1}"
+  "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V16SF")])
@@ -4679,7 +5037,7 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V4SF")])
 
-(define_insn "avx512f_movsldup512"
+(define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
   [(set (match_operand:V16SF 0 "register_operand" "=v")
 	(vec_select:V16SF
 	  (vec_concat:V32SF
@@ -4694,7 +5052,7 @@
 		     (const_int 12) (const_int 12)
 		     (const_int 14) (const_int 14)])))]
   "TARGET_AVX512F"
-  "vmovsldup\t{%1, %0|%0, %1}"
+  "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V16SF")])
@@ -5228,8 +5586,71 @@
   operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
 })
 
-(define_insn "avx512f_vextract<shuffletype>32x4_1"
-  [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
+(define_expand "avx512f_vextract<shuffletype>32x4_mask"
+  [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
+   (match_operand:V16FI 1 "register_operand")
+   (match_operand:SI 2 "const_0_to_3_operand")
+   (match_operand:<ssequartermode> 3 "nonimmediate_operand")
+   (match_operand:QI 4 "register_operand")]
+  "TARGET_AVX512F"
+{
+  if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
+    operands[0] = force_reg (<ssequartermode>mode, operands[0]);
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+          operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
+          GEN_INT (3), operands[3], operands[4]));
+      break;
+    case 1:
+      emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+          operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
+          GEN_INT (7), operands[3], operands[4]));
+      break;
+    case 2:
+      emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+          operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
+          GEN_INT (11), operands[3], operands[4]));
+      break;
+    case 3:
+      emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+          operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
+          GEN_INT (15), operands[3], operands[4]));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  DONE;
+})
+
+(define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
+  [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
+	(vec_merge:<ssequartermode>
+	  (vec_select:<ssequartermode>
+	    (match_operand:V16FI 1 "register_operand" "v")
+	    (parallel [(match_operand 2  "const_0_to_15_operand")
+	      (match_operand 3  "const_0_to_15_operand")
+	      (match_operand 4  "const_0_to_15_operand")
+	      (match_operand 5  "const_0_to_15_operand")]))
+	  (match_operand:<ssequartermode> 6 "memory_operand" "0")
+	  (match_operand:QI 7 "register_operand" "k")))]
+  "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
+  && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
+  && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
+{
+  operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
+  return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
+  [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
 	(vec_select:<ssequartermode>
 	  (match_operand:V16FI 1 "register_operand" "v")
 	  (parallel [(match_operand 2  "const_0_to_15_operand")
@@ -5241,7 +5662,7 @@
   && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
 {
   operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
-  return "vextract<shuffletype>32x4\t{%2, %1, %0|%0, %1, %2}";
+  return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
 }
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
@@ -5253,6 +5674,35 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_expand "avx512f_vextract<shuffletype>64x4_mask"
+  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
+   (match_operand:V8FI 1 "register_operand")
+   (match_operand:SI 2 "const_0_to_1_operand")
+   (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
+   (match_operand:QI 4 "register_operand")]
+  "TARGET_AVX512F"
+{
+  rtx (*insn)(rtx, rtx, rtx, rtx);
+
+  if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
+    operands[0] = force_reg (<ssequartermode>mode, operands[0]);
+
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      insn = gen_vec_extract_lo_<mode>_mask;
+      break;
+    case 1:
+      insn = gen_vec_extract_hi_<mode>_mask;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
+  DONE;
+})
+
 (define_split
   [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
 	(vec_select:<ssehalfvecmode>
@@ -5272,14 +5722,36 @@
   DONE;
 })
 
-(define_insn "vec_extract_lo_<mode>"
-  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
+(define_insn "vec_extract_lo_<mode>_maskm"
+  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+	(vec_merge:<ssehalfvecmode>
+	  (vec_select:<ssehalfvecmode>
+	    (match_operand:V8FI 1 "register_operand" "v")
+	    (parallel [(const_int 0) (const_int 1)
+	      (const_int 2) (const_int 3)]))
+	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
+	  (match_operand:QI 3 "register_operand" "k")))]
+  "TARGET_AVX512F"
+"vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_lo_<mode><mask_name>"
+  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
 	(vec_select:<ssehalfvecmode>
 	  (match_operand:V8FI 1 "nonimmediate_operand" "vm")
 	  (parallel [(const_int 0) (const_int 1)
             (const_int 2) (const_int 3)])))]
   "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "#"
+{
+  if (<mask_applied>)
+    return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
+  else
+    return "#";
+}
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
@@ -5290,14 +5762,32 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "vec_extract_hi_<mode>"
-  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
+(define_insn "vec_extract_hi_<mode>_maskm"
+  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+	(vec_merge:<ssehalfvecmode>
+	  (vec_select:<ssehalfvecmode>
+	    (match_operand:V8FI 1 "register_operand" "v")
+	    (parallel [(const_int 4) (const_int 5)
+	      (const_int 6) (const_int 7)]))
+	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
+	  (match_operand:QI 3 "register_operand" "k")))]
+  "TARGET_AVX512F"
+  "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_hi_<mode><mask_name>"
+  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
 	(vec_select:<ssehalfvecmode>
 	  (match_operand:V8FI 1 "register_operand" "v")
 	  (parallel [(const_int 4) (const_int 5)
             (const_int 6) (const_int 7)])))]
   "TARGET_AVX512F"
-  "vextract<shuffletype>64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
+  "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
@@ -5643,7 +6133,7 @@
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "avx512f_unpckhpd512"
+(define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
   [(set (match_operand:V8DF 0 "register_operand" "=v")
 	(vec_select:V8DF
 	  (vec_concat:V16DF
@@ -5654,7 +6144,7 @@
 		     (const_int 5) (const_int 13)
 		     (const_int 7) (const_int 15)])))]
   "TARGET_AVX512F"
-  "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
+  "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V8DF")])
@@ -5739,7 +6229,7 @@
    (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
    (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
 
-(define_expand "avx512f_movddup512"
+(define_expand "avx512f_movddup512<mask_name>"
   [(set (match_operand:V8DF 0 "register_operand")
 	(vec_select:V8DF
 	  (vec_concat:V16DF
@@ -5751,7 +6241,7 @@
 		     (const_int 6) (const_int 14)])))]
   "TARGET_AVX512F")
 
-(define_expand "avx512f_unpcklpd512"
+(define_expand "avx512f_unpcklpd512<mask_name>"
   [(set (match_operand:V8DF 0 "register_operand")
 	(vec_select:V8DF
 	  (vec_concat:V16DF
@@ -5763,7 +6253,7 @@
 		     (const_int 6) (const_int 14)])))]
   "TARGET_AVX512F")
 
-(define_insn "*avx512f_unpcklpd512"
+(define_insn "*avx512f_unpcklpd512<mask_name>"
   [(set (match_operand:V8DF 0 "register_operand" "=v,v")
 	(vec_select:V8DF
 	  (vec_concat:V16DF
@@ -5775,8 +6265,8 @@
 		     (const_int 6) (const_int 14)])))]
   "TARGET_AVX512F"
   "@
-   vmovddup\t{%1, %0|%0, %1}
-   vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
+   vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
+   vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V8DF")])
@@ -5913,12 +6403,13 @@
   operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
 })
 
-(define_insn "avx512f_vmscalef<mode>"
+(define_insn "*avx512f_vmscalef<mode>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
-	  (unspec:VF_128 [(match_operand:VF_128 1 "register_operand" "v")
-			  (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
-			 UNSPEC_SCALEF)
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
+	    UNSPEC_SCALEF)
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
@@ -5926,13 +6417,14 @@
   [(set_attr "prefix" "evex")
    (set_attr "mode"  "<ssescalarmode>")])
 
-(define_insn "avx512f_scalef<mode>"
+(define_insn "avx512f_scalef<mode><mask_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
-	(unspec:VF_512 [(match_operand:VF_512 1 "register_operand" "v")
-			(match_operand:VF_512 2 "nonimmediate_operand" "vm")]
-		       UNSPEC_SCALEF))]
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "register_operand" "v")
+	   (match_operand:VF_512 2 "nonimmediate_operand" "vm")]
+	  UNSPEC_SCALEF))]
   "TARGET_AVX512F"
-  "%vscalef<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "%vscalef<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "prefix" "evex")
    (set_attr "mode"  "<MODE>")])
 
@@ -5950,21 +6442,39 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx512f_getexp<mode>"
+(define_insn "avx512f_vternlog<mode>_mask"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+	(vec_merge:VI48_512
+	  (unspec:VI48_512
+	    [(match_operand:VI48_512 1 "register_operand" "0")
+	     (match_operand:VI48_512 2 "register_operand" "v")
+	     (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
+	     (match_operand:SI 4 "const_0_to_255_operand")]
+	    UNSPEC_VTERNLOG)
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
+  "TARGET_AVX512F"
+  "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_getexp<mode><mask_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
         (unspec:VF_512 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
                         UNSPEC_GETEXP))]
    "TARGET_AVX512F"
-   "vgetexp<ssemodesuffix>\t{%1, %0|%0, %1}";
+   "vgetexp<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
     [(set_attr "prefix" "evex")
      (set_attr "mode" "<MODE>")])
 
 (define_insn "avx512f_sgetexp<mode>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
-	  (unspec:VF_128 [(match_operand:VF_128 1 "register_operand" "v")
-			  (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
-			 UNSPEC_GETEXP)
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
+	    UNSPEC_GETEXP)
 	  (match_dup 1)
 	  (const_int 1)))]
    "TARGET_AVX512F"
@@ -5972,17 +6482,48 @@
     [(set_attr "prefix" "evex")
      (set_attr "mode" "<ssescalarmode>")])
 
-(define_insn "avx512f_align<mode>"
+(define_insn "<mask_codefor>avx512f_align<mode><mask_name>"
   [(set (match_operand:VI48_512 0 "register_operand" "=v")
         (unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
 			  (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
 			  (match_operand:SI 3 "const_0_to_255_operand")]
 			 UNSPEC_ALIGN))]
   "TARGET_AVX512F"
-  "valign<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+  "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_expand "avx512f_shufps512_mask"
+  [(match_operand:V16SF 0 "register_operand")
+   (match_operand:V16SF 1 "register_operand")
+   (match_operand:V16SF 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_255_operand")
+   (match_operand:V16SF 4 "register_operand")
+   (match_operand:HI 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
+					  GEN_INT ((mask >> 0) & 3),
+					  GEN_INT ((mask >> 2) & 3),
+					  GEN_INT (((mask >> 4) & 3) + 16),
+					  GEN_INT (((mask >> 6) & 3) + 16),
+					  GEN_INT (((mask >> 0) & 3) + 4),
+					  GEN_INT (((mask >> 2) & 3) + 4),
+					  GEN_INT (((mask >> 4) & 3) + 20),
+					  GEN_INT (((mask >> 6) & 3) + 20),
+					  GEN_INT (((mask >> 0) & 3) + 8),
+					  GEN_INT (((mask >> 2) & 3) + 8),
+					  GEN_INT (((mask >> 4) & 3) + 24),
+					  GEN_INT (((mask >> 6) & 3) + 24),
+					  GEN_INT (((mask >> 0) & 3) + 12),
+					  GEN_INT (((mask >> 2) & 3) + 12),
+					  GEN_INT (((mask >> 4) & 3) + 28),
+					  GEN_INT (((mask >> 6) & 3) + 28),
+					  operands[4], operands[5]));
+  DONE;
+})
+
 (define_insn "avx512f_fixupimm<mode>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
         (unspec:VF_512
@@ -5996,6 +6537,22 @@
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "avx512f_fixupimm<mode>_mask"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(vec_merge:VF_512
+          (unspec:VF_512
+            [(match_operand:VF_512 1 "register_operand" "0")
+	     (match_operand:VF_512 2 "register_operand" "v")
+             (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "vm")
+             (match_operand:SI 4 "const_0_to_255_operand")]
+             UNSPEC_FIXUPIMM)
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
+  "TARGET_AVX512F"
+  "vfixupimm<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}";
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "avx512f_sfixupimm<mode>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
@@ -6012,19 +6569,38 @@
    [(set_attr "prefix" "evex")
    (set_attr "mode" "<ssescalarmode>")])
 
-(define_insn "avx512f_rndscale<mode>"
+(define_insn "avx512f_sfixupimm<mode>_mask"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (unspec:VF_128
+	       [(match_operand:VF_128 1 "register_operand" "0")
+		(match_operand:VF_128 2 "register_operand" "v")
+		(match_operand:<sseintvecmode> 3 "nonimmediate_operand" "vm")
+		(match_operand:SI 4 "const_0_to_255_operand")]
+	       UNSPEC_FIXUPIMM)
+	    (match_dup 1)
+	    (const_int 1))
+	  (match_dup 1)
+	  (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
+  "TARGET_AVX512F"
+  "vfixupimm<ssescalarmodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}";
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_rndscale<mode><mask_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(unspec:VF_512
 	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")
 	   (match_operand:SI 2 "const_0_to_255_operand")]
 	  UNSPEC_ROUND))]
   "TARGET_AVX512F"
-  "vrndscale<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "vrndscale<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_rndscale<mode>"
+(define_insn "*avx512f_rndscale<mode>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
@@ -6041,7 +6617,7 @@
    (set_attr "mode" "<MODE>")])
 
 ;; One bit in mask selects 2 elements.
-(define_insn "avx512f_shufps512_1"
+(define_insn "avx512f_shufps512_1<mask_name>"
   [(set (match_operand:V16SF 0 "register_operand" "=v")
 	(vec_select:V16SF
 	  (vec_concat:V32SF
@@ -6084,14 +6660,37 @@
   mask |= (INTVAL (operands[6]) - 16) << 6;
   operands[3] = GEN_INT (mask);
 
-  return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+  return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
 }
   [(set_attr "type" "sselog")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V16SF")])
 
-(define_insn "avx512f_shufpd512_1"
+(define_expand "avx512f_shufpd512_mask"
+  [(match_operand:V8DF 0 "register_operand")
+   (match_operand:V8DF 1 "register_operand")
+   (match_operand:V8DF 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_255_operand")
+   (match_operand:V8DF 4 "register_operand")
+   (match_operand:QI 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
+					GEN_INT (mask & 1),
+					GEN_INT (mask & 2 ? 9 : 8),
+					GEN_INT (mask & 4 ? 3 : 2),
+					GEN_INT (mask & 8 ? 11 : 10),
+					GEN_INT (mask & 16 ? 5 : 4),
+					GEN_INT (mask & 32 ? 13 : 12),
+					GEN_INT (mask & 64 ? 7 : 6),
+					GEN_INT (mask & 128 ? 15 : 14),
+					operands[4], operands[5]));
+  DONE;
+})
+
+(define_insn "avx512f_shufpd512_1<mask_name>"
   [(set (match_operand:V8DF 0 "register_operand" "=v")
 	(vec_select:V8DF
 	  (vec_concat:V16DF
@@ -6118,7 +6717,7 @@
   mask |= (INTVAL (operands[10]) - 14) << 7;
   operands[3] = GEN_INT (mask);
 
-  return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+  return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
 }
   [(set_attr "type" "sselog")
    (set_attr "length_immediate" "1")
@@ -6198,7 +6797,7 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
-(define_insn "avx512f_interleave_highv8di"
+(define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(vec_select:V8DI
 	  (vec_concat:V16DI
@@ -6209,7 +6808,7 @@
 		     (const_int 5) (const_int 13)
 		     (const_int 7) (const_int 15)])))]
   "TARGET_AVX512F"
-  "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+  "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
@@ -6248,7 +6847,7 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
-(define_insn "avx512f_interleave_lowv8di"
+(define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(vec_select:V8DI
 	  (vec_concat:V16DI
@@ -6259,7 +6858,7 @@
 		     (const_int 4) (const_int 12)
 		     (const_int 6) (const_int 14)])))]
   "TARGET_AVX512F"
-  "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+  "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
@@ -6630,6 +7229,20 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
+  [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
+    (vec_merge:PMOV_DST_MODE
+      (any_truncate:PMOV_DST_MODE
+        (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
+      (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
+      (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
+  "TARGET_AVX512F"
+  "vpmov<trunsuffix><pmov_suff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "*avx512f_<code>v8div16qi2"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 	(vec_concat:V16QI
@@ -6663,6 +7276,55 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
+(define_insn "avx512f_<code>v8div16qi2_mask"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+    (vec_concat:V16QI
+      (vec_merge:V8QI
+        (any_truncate:V8QI
+          (match_operand:V8DI 1 "register_operand" "v"))
+        (vec_select:V8QI
+          (match_operand:V16QI 2 "vector_move_operand" "0C")
+          (parallel [(const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)]))
+        (match_operand:QI 3 "register_operand" "k"))
+      (const_vector:V8QI [(const_int 0) (const_int 0)
+                          (const_int 0) (const_int 0)
+                          (const_int 0) (const_int 0)
+                          (const_int 0) (const_int 0)])))]
+  "TARGET_AVX512F"
+  "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx512f_<code>v8div16qi2_store_mask"
+  [(set (match_operand:V16QI 0 "memory_operand" "=m")
+    (vec_concat:V16QI
+      (vec_merge:V8QI
+        (any_truncate:V8QI
+          (match_operand:V8DI 1 "register_operand" "v"))
+        (vec_select:V8QI
+          (match_dup 0)
+          (parallel [(const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)]))
+        (match_operand:QI 2 "register_operand" "k"))
+      (vec_select:V8QI
+        (match_dup 0)
+        (parallel [(const_int 8) (const_int 9)
+                   (const_int 10) (const_int 11)
+                   (const_int 12) (const_int 13)
+                   (const_int 14) (const_int 15)]))))]
+  "TARGET_AVX512F"
+  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel integral arithmetic
@@ -6677,27 +7339,27 @@
   "TARGET_SSE2"
   "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
 
-(define_expand "<plusminus_insn><mode>3"
+(define_expand "<plusminus_insn><mode>3<mask_name>"
   [(set (match_operand:VI_AVX2 0 "register_operand")
 	(plusminus:VI_AVX2
 	  (match_operand:VI_AVX2 1 "nonimmediate_operand")
 	  (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && <mask_mode512bit_condition>"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
-(define_insn "*<plusminus_insn><mode>3"
+(define_insn "*<plusminus_insn><mode>3<mask_name>"
   [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
 	(plusminus:VI_AVX2
 	  (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
 	  (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
   "@
    p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
-   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseiadd")
    (set_attr "prefix_data16" "1,*")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "<mask_prefix3>")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
@@ -6787,7 +7449,7 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "vec_widen_umult_even_v16si"
+(define_expand "vec_widen_umult_even_v16si<mask_name>"
   [(set (match_operand:V8DI 0 "register_operand")
         (mult:V8DI
           (zero_extend:V8DI
@@ -6807,7 +7469,7 @@
   "TARGET_AVX512F"
   "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
 
-(define_insn "*vec_widen_umult_even_v16si"
+(define_insn "*vec_widen_umult_even_v16si<mask_name>"
   [(set (match_operand:V8DI 0 "register_operand" "=v")
         (mult:V8DI
           (zero_extend:V8DI
@@ -6825,7 +7487,7 @@
                          (const_int 8) (const_int 10)
                          (const_int 12) (const_int 14)])))))]
   "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
-  "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+  "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "avx512f")
    (set_attr "type" "sseimul")
    (set_attr "prefix_extra" "1")
@@ -6902,7 +7564,7 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
-(define_expand "vec_widen_smult_even_v16si"
+(define_expand "vec_widen_smult_even_v16si<mask_name>"
   [(set (match_operand:V8DI 0 "register_operand")
         (mult:V8DI
           (sign_extend:V8DI
@@ -6922,7 +7584,7 @@
   "TARGET_AVX512F"
   "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
 
-(define_insn "*vec_widen_smult_even_v16si"
+(define_insn "*vec_widen_smult_even_v16si<mask_name>"
   [(set (match_operand:V8DI 0 "register_operand" "=v")
         (mult:V8DI
           (sign_extend:V8DI
@@ -6940,7 +7602,7 @@
                          (const_int 8) (const_int 10)
                          (const_int 12) (const_int 14)])))))]
   "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
-  "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
+  "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "avx512f")
    (set_attr "type" "sseimul")
    (set_attr "prefix_extra" "1")
@@ -7150,12 +7812,12 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
-(define_expand "mul<mode>3"
+(define_expand "mul<mode>3<mask_name>"
   [(set (match_operand:VI4_AVX512F 0 "register_operand")
 	(mult:VI4_AVX512F
 	  (match_operand:VI4_AVX512F 1 "general_vector_operand")
 	  (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && <mask_mode512bit_condition>"
 {
   if (TARGET_SSE4_1)
     {
@@ -7172,19 +7834,19 @@
     }
 })
 
-(define_insn "*<sse4_1_avx2>_mul<mode>3"
+(define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
   [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
 	(mult:VI4_AVX512F
 	  (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
 	  (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
   "@
    pmulld\t{%2, %0|%0, %2}
-   vpmulld\t{%2, %1, %0|%0, %1, %2}"
+   vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseimul")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "<mask_prefix3>")
    (set_attr "btver2_decode" "vector,vector")
    (set_attr "mode" "<sseinsnmode>")])
 
@@ -7297,6 +7959,20 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "ashr<mode>3<mask_name>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
+	(ashiftrt:VI48_512
+	  (match_operand:VI48_512 1 "nonimmediate_operand" "v,vm")
+	  (match_operand:SI 2 "nonmemory_operand" "v,N")))]
+  "TARGET_AVX512F && <mask_mode512bit_condition>"
+  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "<shift_insn><mode>3"
   [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
 	(any_lshift:VI248_AVX2
@@ -7316,13 +7992,13 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<shift_insn><mode>3"
+(define_insn "<shift_insn><mode>3<mask_name>"
   [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
 	(any_lshift:VI48_512
 	  (match_operand:VI48_512 1 "register_operand" "v,m")
 	  (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
-  "TARGET_AVX512F"
-  "vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "TARGET_AVX512F && <mask_mode512bit_condition>"
+  "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "avx512f")
    (set_attr "type" "sseishft")
    (set (attr "length_immediate")
@@ -7332,6 +8008,7 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+
 (define_expand "vec_shl_<mode>"
   [(set (match_dup 3)
 	(ashift:V1TI
@@ -7411,41 +8088,42 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx512f_<rotate>v<mode>"
+(define_insn "avx512f_<rotate>v<mode><mask_name>"
   [(set (match_operand:VI48_512 0 "register_operand" "=v")
 	(any_rotate:VI48_512
 	  (match_operand:VI48_512 1 "register_operand" "v")
 	  (match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512F"
-  "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx512f_<rotate><mode>"
+(define_insn "avx512f_<rotate><mode><mask_name>"
   [(set (match_operand:VI48_512 0 "register_operand" "=v")
 	(any_rotate:VI48_512
 	  (match_operand:VI48_512 1 "nonimmediate_operand" "vm")
 	  (match_operand:SI 2 "const_0_to_255_operand")))]
   "TARGET_AVX512F"
-  "vp<rotate><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "<code><mode>3"
+(define_expand "<code><mode>3<mask_name>"
   [(set (match_operand:VI124_256_48_512 0 "register_operand")
 	(maxmin:VI124_256_48_512
 	  (match_operand:VI124_256_48_512 1 "nonimmediate_operand")
 	  (match_operand:VI124_256_48_512 2 "nonimmediate_operand")))]
-  "TARGET_AVX2"
+  "TARGET_AVX2 && <mask_mode512bit_condition>"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
-(define_insn "*avx2_<code><mode>3"
+(define_insn "*avx2_<code><mode>3<mask_name>"
   [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
 	(maxmin:VI124_256_48_512
 	  (match_operand:VI124_256_48_512 1 "nonimmediate_operand" "%v")
 	  (match_operand:VI124_256_48_512 2 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && <mask_mode512bit_condition>"
+  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseiadd")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_evex")
@@ -7981,19 +8659,19 @@
   operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
 })
 
-(define_expand "<sse2_avx2>_andnot<mode>3"
+(define_expand "<sse2_avx2>_andnot<mode>3<mask_name>"
   [(set (match_operand:VI_AVX2 0 "register_operand")
 	(and:VI_AVX2
 	  (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
 	  (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
-  "TARGET_SSE2")
+  "TARGET_SSE2 && <mask_mode512bit_condition>")
 
-(define_insn "*andnot<mode>3"
+(define_insn "*andnot<mode>3<mask_name>"
   [(set (match_operand:VI 0 "register_operand" "=x,v")
 	(and:VI
 	  (not:VI (match_operand:VI 1 "register_operand" "0,v"))
 	  (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE"
+  "TARGET_SSE && <mask_mode512bit_condition>"
 {
   static char buf[64];
   const char *ops;
@@ -8033,7 +8711,7 @@
       ops = "%s\t{%%2, %%0|%%0, %%2}";
       break;
     case 1:
-      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+      ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
       break;
     default:
       gcc_unreachable ();
@@ -8050,7 +8728,7 @@
 	    (eq_attr "mode" "TI"))
        (const_string "1")
        (const_string "*")))
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "<mask_prefix3>")
    (set (attr "mode")
 	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
 		 (const_string "<ssePSmode>")
@@ -8078,12 +8756,12 @@
   DONE;
 })
 
-(define_insn "*<code><mode>3"
+(define_insn "<mask_codefor><code><mode>3<mask_name>"
   [(set (match_operand:VI 0 "register_operand" "=x,v")
 	(any_logic:VI
 	  (match_operand:VI 1 "nonimmediate_operand" "%0,v")
 	  (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE
+  "TARGET_SSE && <mask_mode512bit_condition>
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
   static char buf[64];
@@ -8125,7 +8803,7 @@
       ops = "%s\t{%%2, %%0|%%0, %%2}";
       break;
     case 1:
-      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+      ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
       break;
     default:
       gcc_unreachable ();
@@ -8142,7 +8820,7 @@
 	    (eq_attr "mode" "TI"))
        (const_string "1")
        (const_string "*")))
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "<mask_prefix3>")
    (set (attr "mode")
 	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
 		 (const_string "<ssePSmode>")
@@ -8450,7 +9128,7 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
-(define_insn "avx512f_interleave_highv16si"
+(define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(vec_select:V16SI
 	  (vec_concat:V32SI
@@ -8465,7 +9143,7 @@
 		     (const_int 14) (const_int 30)
 		     (const_int 15) (const_int 31)])))]
   "TARGET_AVX512F"
-  "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
+  "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
@@ -8505,7 +9183,7 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
-(define_insn "avx512f_interleave_lowv16si"
+(define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(vec_select:V16SI
 	  (vec_concat:V32SI
@@ -8520,7 +9198,7 @@
 		     (const_int 12) (const_int 28)
 		     (const_int 13) (const_int 29)])))]
   "TARGET_AVX512F"
-  "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
+  "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
@@ -8645,7 +9323,45 @@
    (set_attr "prefix" "orig,orig,vex,vex")
    (set_attr "mode" "TI")])
 
-(define_insn "avx512f_vinsert<shuffletype>32x4_1"
+(define_expand "avx512f_vinsert<shuffletype>32x4_mask"
+  [(match_operand:V16FI 0 "register_operand")
+   (match_operand:V16FI 1 "register_operand")
+   (match_operand:<ssequartermode> 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_3_operand")
+   (match_operand:V16FI 4 "register_operand")
+   (match_operand:<avx512fmaskmode> 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  switch (INTVAL (operands[3]))
+    {
+    case 0:
+      emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+          operands[1], operands[2], GEN_INT (0xFFF), operands[4],
+	  operands[5]));
+      break;
+    case 1:
+      emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+          operands[1], operands[2], GEN_INT (0xF0FF), operands[4],
+	  operands[5]));
+      break;
+    case 2:
+      emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+          operands[1], operands[2], GEN_INT (0xFF0F), operands[4],
+	  operands[5]));
+      break;
+    case 3:
+      emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+          operands[1], operands[2], GEN_INT (0xFFF0), operands[4],
+	  operands[5]));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  DONE;
+
+})
+
+(define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>"
   [(set (match_operand:V16FI 0 "register_operand" "=v")
 	(vec_merge:V16FI
 	  (match_operand:V16FI 1 "register_operand" "v")
@@ -8668,14 +9384,35 @@
 
   operands[3] = GEN_INT (mask);
 
-  return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+  return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
 }
   [(set_attr "type" "sselog")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "vec_set_lo_<mode>"
+(define_expand "avx512f_vinsert<shuffletype>64x4_mask"
+  [(match_operand:V8FI 0 "register_operand")
+   (match_operand:V8FI 1 "register_operand")
+   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_1_operand")
+   (match_operand:V8FI 4 "register_operand")
+   (match_operand:<avx512fmaskmode> 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  int mask = INTVAL (operands[3]);
+  if (mask == 0)
+    emit_insn (gen_vec_set_lo_<mode>_mask
+      (operands[0], operands[1], operands[2],
+       operands[4], operands[5]));
+  else
+    emit_insn (gen_vec_set_hi_<mode>_mask
+      (operands[0], operands[1], operands[2],
+       operands[4], operands[5]));
+  DONE;
+})
+
+(define_insn "vec_set_lo_<mode><mask_name>"
   [(set (match_operand:V8FI 0 "register_operand" "=v")
 	(vec_concat:V8FI
 	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
@@ -8684,13 +9421,13 @@
 	    (parallel [(const_int 4) (const_int 5)
               (const_int 6) (const_int 7)]))))]
   "TARGET_AVX512F"
-  "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0|%0, %1, %2, $0x0}"
+  "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
   [(set_attr "type" "sselog")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_insn "vec_set_hi_<mode>"
+(define_insn "vec_set_hi_<mode><mask_name>"
   [(set (match_operand:V8FI 0 "register_operand" "=v")
 	(vec_concat:V8FI
 	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
@@ -8699,13 +9436,37 @@
 	    (parallel [(const_int 0) (const_int 1)
               (const_int 2) (const_int 3)]))))]
   "TARGET_AVX512F"
-  "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0|%0, %1, %2, $0x1}"
+  "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
   [(set_attr "type" "sselog")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_insn "avx512f_shuf_<shuffletype>64x2_1"
+(define_expand "avx512f_shuf_<shuffletype>64x2_mask"
+  [(match_operand:V8FI 0 "register_operand")
+   (match_operand:V8FI 1 "register_operand")
+   (match_operand:V8FI 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_255_operand")
+   (match_operand:V8FI 4 "register_operand")
+   (match_operand:QI 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
+      (operands[0], operands[1], operands[2],
+       GEN_INT (((mask >> 0) & 3) * 2),
+       GEN_INT (((mask >> 0) & 3) * 2 + 1),
+       GEN_INT (((mask >> 2) & 3) * 2),
+       GEN_INT (((mask >> 2) & 3) * 2 + 1),
+       GEN_INT (((mask >> 4) & 3) * 2 + 8),
+       GEN_INT (((mask >> 4) & 3) * 2 + 9),
+       GEN_INT (((mask >> 6) & 3) * 2 + 8),
+       GEN_INT (((mask >> 6) & 3) * 2 + 9),
+       operands[4], operands[5]));
+  DONE;
+})
+
+(define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
   [(set (match_operand:V8FI 0 "register_operand" "=v")
 	(vec_select:V8FI
 	  (vec_concat:<ssedoublemode>
@@ -8732,14 +9493,46 @@
   mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
   operands[3] = GEN_INT (mask);
 
-  return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+  return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
 }
   [(set_attr "type" "sselog")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx512f_shuf_<shuffletype>32x4_1"
+(define_expand "avx512f_shuf_<shuffletype>32x4_mask"
+  [(match_operand:V16FI 0 "register_operand")
+   (match_operand:V16FI 1 "register_operand")
+   (match_operand:V16FI 2 "nonimmediate_operand")
+   (match_operand:SI 3 "const_0_to_255_operand")
+   (match_operand:V16FI 4 "register_operand")
+   (match_operand:HI 5 "register_operand")]
+  "TARGET_AVX512F"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
+      (operands[0], operands[1], operands[2],
+       GEN_INT (((mask >> 0) & 3) * 4),
+       GEN_INT (((mask >> 0) & 3) * 4 + 1),
+       GEN_INT (((mask >> 0) & 3) * 4 + 2),
+       GEN_INT (((mask >> 0) & 3) * 4 + 3),
+       GEN_INT (((mask >> 2) & 3) * 4),
+       GEN_INT (((mask >> 2) & 3) * 4 + 1),
+       GEN_INT (((mask >> 2) & 3) * 4 + 2),
+       GEN_INT (((mask >> 2) & 3) * 4 + 3),
+       GEN_INT (((mask >> 4) & 3) * 4 + 16),
+       GEN_INT (((mask >> 4) & 3) * 4 + 17),
+       GEN_INT (((mask >> 4) & 3) * 4 + 18),
+       GEN_INT (((mask >> 4) & 3) * 4 + 19),
+       GEN_INT (((mask >> 6) & 3) * 4 + 16),
+       GEN_INT (((mask >> 6) & 3) * 4 + 17),
+       GEN_INT (((mask >> 6) & 3) * 4 + 18),
+       GEN_INT (((mask >> 6) & 3) * 4 + 19),
+       operands[4], operands[5]));
+  DONE;
+})
+
+(define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
   [(set (match_operand:V16FI 0 "register_operand" "=v")
 	(vec_select:V16FI
 	  (vec_concat:<ssedoublemode>
@@ -8782,14 +9575,44 @@
   mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
   operands[3] = GEN_INT (mask);
 
-  return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+  return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
 }
   [(set_attr "type" "sselog")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx512f_pshufd_1"
+(define_expand "avx512f_pshufdv3_mask"
+  [(match_operand:V16SI 0 "register_operand")
+   (match_operand:V16SI 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_0_to_255_operand")
+   (match_operand:V16SI 3 "register_operand")
+   (match_operand:HI 4 "register_operand")]
+  "TARGET_AVX512F"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
+				       GEN_INT ((mask >> 0) & 3),
+				       GEN_INT ((mask >> 2) & 3),
+				       GEN_INT ((mask >> 4) & 3),
+				       GEN_INT ((mask >> 6) & 3),
+				       GEN_INT (((mask >> 0) & 3) + 4),
+				       GEN_INT (((mask >> 2) & 3) + 4),
+				       GEN_INT (((mask >> 4) & 3) + 4),
+				       GEN_INT (((mask >> 6) & 3) + 4),
+				       GEN_INT (((mask >> 0) & 3) + 8),
+				       GEN_INT (((mask >> 2) & 3) + 8),
+				       GEN_INT (((mask >> 4) & 3) + 8),
+				       GEN_INT (((mask >> 6) & 3) + 8),
+				       GEN_INT (((mask >> 0) & 3) + 12),
+				       GEN_INT (((mask >> 2) & 3) + 12),
+				       GEN_INT (((mask >> 4) & 3) + 12),
+				       GEN_INT (((mask >> 6) & 3) + 12),
+				       operands[3], operands[4]));
+  DONE;
+})
+
+(define_insn "avx512f_pshufd_1<mask_name>"
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(vec_select:V16SI
 	  (match_operand:V16SI 1 "nonimmediate_operand" "vm")
@@ -8830,7 +9653,7 @@
   mask |= INTVAL (operands[5]) << 6;
   operands[2] = GEN_INT (mask);
 
-  return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
+  return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
 }
   [(set_attr "type" "sselog1")
    (set_attr "prefix" "evex")
@@ -10281,12 +11104,12 @@
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
    (set_attr "mode" "DI")])
 
-(define_insn "*abs<mode>2"
+(define_insn "<mask_codefor>abs<mode>2<mask_name>"
   [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
 	(abs:VI124_AVX2_48_AVX512F
 	  (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
-  "TARGET_SSSE3"
-  "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
+  "TARGET_SSSE3 && <mask_mode512bit_condition>"
+  "%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "sselog1")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
@@ -10640,12 +11463,12 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
-(define_insn "avx512f_<code>v16qiv16si2"
+(define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(any_extend:V16SI
 	  (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512F"
-  "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
+  "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
@@ -10680,12 +11503,12 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
-(define_insn "avx512f_<code>v16hiv16si2"
+(define_insn "avx512f_<code>v16hiv16si2<mask_name>"
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(any_extend:V16SI
 	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512F"
-  "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
+  "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
@@ -10715,7 +11538,7 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
-(define_insn "avx512f_<code>v8qiv8di2"
+(define_insn "avx512f_<code>v8qiv8di2<mask_name>"
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(any_extend:V8DI
 	  (vec_select:V8QI
@@ -10725,7 +11548,7 @@
 		       (const_int 4) (const_int 5)
 		       (const_int 6) (const_int 7)]))))]
   "TARGET_AVX512F"
-  "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
+  "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
@@ -10757,12 +11580,12 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
-(define_insn "avx512f_<code>v8hiv8di2"
+(define_insn "avx512f_<code>v8hiv8di2<mask_name>"
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(any_extend:V8DI
 	  (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512F"
-  "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
+  "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
@@ -10794,12 +11617,12 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
-(define_insn "avx512f_<code>v8siv8di2"
+(define_insn "avx512f_<code>v8siv8di2<mask_name>"
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(any_extend:V8DI
 	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512F"
-  "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
+  "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
@@ -11582,33 +12405,33 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_insn "*avx512er_exp2<mode>"
+(define_insn "avx512er_exp2<mode><mask_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(unspec:VF_512
 	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_EXP2))]
   "TARGET_AVX512ER"
-  "vexp2<ssemodesuffix>\t{%1, %0|%0, %1}"
+  "vexp2<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*avx512er_rcp28<mode>"
+(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(unspec:VF_512
 	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_RCP28))]
   "TARGET_AVX512ER"
-  "vrcp28<ssemodesuffix>\t{%1, %0|%0, %1}"
+  "vrcp28<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512er_rsqrt28<mode>"
+(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(unspec:VF_512
 	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_RSQRT28))]
   "TARGET_AVX512ER"
-  "vrsqrt28<ssemodesuffix>\t{%1, %0|%0, %1}"
+  "vrsqrt28<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
@@ -12658,16 +13481,16 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<avx2_avx512f>_permvar<mode>"
+(define_insn "<avx2_avx512f>_permvar<mode><mask_name>"
   [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
 	(unspec:VI48F_256_512
 	  [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
 	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
 	  UNSPEC_VPERMVAR))]
-  "TARGET_AVX2"
-  "vperm<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
+  "TARGET_AVX2 && <mask_mode512bit_condition>"
+  "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
   [(set_attr "type" "sselog")
-   (set_attr "prefix" "vex")
+   (set_attr "prefix" "<mask_prefix2>")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_expand "<avx2_avx512f>_perm<mode>"
@@ -12678,14 +13501,32 @@
 {
   int mask = INTVAL (operands[2]);
   emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
-					    GEN_INT ((mask >> 0) & 3),
-					    GEN_INT ((mask >> 2) & 3),
-					    GEN_INT ((mask >> 4) & 3),
-					    GEN_INT ((mask >> 6) & 3)));
+					      GEN_INT ((mask >> 0) & 3),
+					      GEN_INT ((mask >> 2) & 3),
+					      GEN_INT ((mask >> 4) & 3),
+					      GEN_INT ((mask >> 6) & 3)));
+  DONE;
+})
+
+(define_expand "avx512f_perm<mode>_mask"
+  [(match_operand:V8FI 0 "register_operand")
+   (match_operand:V8FI 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_0_to_255_operand")
+   (match_operand:V8FI 3 "vector_move_operand")
+   (match_operand:<avx512fmaskmode> 4 "register_operand")]
+  "TARGET_AVX512F"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_<avx2_avx512f>_perm<mode>_1_mask (operands[0], operands[1],
+						   GEN_INT ((mask >> 0) & 3),
+						   GEN_INT ((mask >> 2) & 3),
+						   GEN_INT ((mask >> 4) & 3),
+						   GEN_INT ((mask >> 6) & 3),
+						   operands[3], operands[4]));
   DONE;
 })
 
-(define_insn "<avx2_avx512f>_perm<mode>_1"
+(define_insn "<avx2_avx512f>_perm<mode>_1<mask_name>"
   [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
 	(vec_select:VI8F_256_512
 	  (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
@@ -12693,7 +13534,7 @@
 		     (match_operand 3 "const_0_to_3_operand")
 		     (match_operand 4 "const_0_to_3_operand")
 		     (match_operand 5 "const_0_to_3_operand")])))]
-  "TARGET_AVX2"
+  "TARGET_AVX2 && <mask_mode512bit_condition>"
 {
   int mask = 0;
   mask |= INTVAL (operands[2]) << 0;
@@ -12701,10 +13542,10 @@
   mask |= INTVAL (operands[4]) << 4;
   mask |= INTVAL (operands[5]) << 6;
   operands[2] = GEN_INT (mask);
-  return "vperm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+  return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
 }
   [(set_attr "type" "sselog")
-   (set_attr "prefix" "vex")
+   (set_attr "prefix" "<mask_prefix2>")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx2_permv2ti"
@@ -12751,58 +13592,58 @@
    (set_attr "isa" "*,avx2,noavx2")
    (set_attr "mode" "V8SF")])
 
-(define_insn "avx512f_vec_dup<mode>"
+(define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
   [(set (match_operand:VI48F_512 0 "register_operand" "=v")
 	(vec_duplicate:VI48F_512
 	  (vec_select:<ssescalarmode>
 	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
 	    (parallel [(const_int 0)]))))]
   "TARGET_AVX512F"
-  "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}"
+  "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx512f_broadcast<mode>"
+(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
   [(set (match_operand:V16FI 0 "register_operand" "=v,v")
 	(vec_duplicate:V16FI
 	  (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
   "TARGET_AVX512F"
   "@
-   vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0|%0, %g1, %g1, 0x0}
-   vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
+   vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
+   vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx512f_broadcast<mode>"
+(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
   [(set (match_operand:V8FI 0 "register_operand" "=v,v")
 	(vec_duplicate:V8FI
 	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
   "TARGET_AVX512F"
   "@
-   vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0|%0, %g1, %g1, 0x44}
-   vbroadcast<shuffletype>64x4\t{%1, %0|%0, %1}"
+   vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
+   vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx512f_vec_dup_gpr<mode>"
+(define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
   [(set (match_operand:VI48_512 0 "register_operand" "=v")
 	(vec_duplicate:VI48_512
 	  (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
   "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
-  "vpbroadcast<bcstscalarsuff>\t{%1, %0|%0, %1}"
+  "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx512f_vec_dup_mem<mode>"
+(define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
   [(set (match_operand:VI48F_512 0 "register_operand" "=v")
 	(vec_duplicate:VI48F_512
 	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512F"
-  "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}"
+  "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -12942,12 +13783,12 @@
 				elt * GET_MODE_SIZE (<ssescalarmode>mode));
 })
 
-(define_expand "<sse2_avx_avx512f>_vpermil<mode>"
+(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
   [(set (match_operand:VF2 0 "register_operand")
 	(vec_select:VF2
 	  (match_operand:VF2 1 "nonimmediate_operand")
 	  (match_operand:SI 2 "const_0_to_255_operand")))]
-  "TARGET_AVX"
+  "TARGET_AVX && <mask_mode512bit_condition>"
 {
   int mask = INTVAL (operands[2]);
   rtx perm[<ssescalarnum>];
@@ -12963,12 +13804,12 @@
     = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
 })
 
-(define_expand "<sse2_avx_avx512f>_vpermil<mode>"
+(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
   [(set (match_operand:VF1 0 "register_operand")
 	(vec_select:VF1
 	  (match_operand:VF1 1 "nonimmediate_operand")
 	  (match_operand:SI 2 "const_0_to_255_operand")))]
-  "TARGET_AVX"
+  "TARGET_AVX && <mask_mode512bit_condition>"
 {
   int mask = INTVAL (operands[2]);
   rtx perm[<ssescalarnum>];
@@ -12986,37 +13827,37 @@
     = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
 })
 
-(define_insn "*<sse2_avx_avx512f>_vpermilp<mode>"
+(define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
   [(set (match_operand:VF 0 "register_operand" "=v")
 	(vec_select:VF
 	  (match_operand:VF 1 "nonimmediate_operand" "vm")
 	  (match_parallel 2 ""
 	    [(match_operand 3 "const_int_operand")])))]
-  "TARGET_AVX
+  "TARGET_AVX && <mask_mode512bit_condition>
    && avx_vpermilp_parallel (operands[2], <MODE>mode)"
 {
   int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
   operands[2] = GEN_INT (mask);
-  return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+  return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
 }
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
-   (set_attr "prefix" "vex")
+   (set_attr "prefix" "<mask_prefix>")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3"
+(define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
   [(set (match_operand:VF 0 "register_operand" "=v")
 	(unspec:VF
 	  [(match_operand:VF 1 "register_operand" "v")
 	   (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
 	  UNSPEC_VPERMIL))]
-  "TARGET_AVX"
-  "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "TARGET_AVX && <mask_mode512bit_condition>"
+  "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "btver2_decode" "vector")
-   (set_attr "prefix" "vex")
+   (set_attr "prefix" "<mask_prefix>")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx512f_vpermi2var<mode>3"
@@ -13032,6 +13873,22 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "avx512f_vpermi2var<mode>3_mask"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(vec_merge:VI48F_512
+	  (unspec:VI48F_512
+	    [(match_operand:VI48F_512 1 "register_operand" "v")
+	    (match_operand:<sseintvecmode> 2 "register_operand" "0")
+	    (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+	    UNSPEC_VPERMI2_MASK)
+	  (match_dup 0)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+  "TARGET_AVX512F"
+  "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "avx512f_vpermt2var<mode>3"
   [(set (match_operand:VI48F_512 0 "register_operand" "=v")
 	(unspec:VI48F_512
@@ -13045,6 +13902,22 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "avx512f_vpermt2var<mode>3_mask"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(vec_merge:VI48F_512
+	  (unspec:VI48F_512
+	    [(match_operand:<sseintvecmode> 1 "register_operand" "v")
+	    (match_operand:VI48F_512 2 "register_operand" "0")
+	    (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+	    UNSPEC_VPERMT2)
+	  (match_dup 2)
+	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+  "TARGET_AVX512F"
+  "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_expand "avx_vperm2f128<mode>3"
   [(set (match_operand:AVX256MODE2P 0 "register_operand")
 	(unspec:AVX256MODE2P
@@ -13435,24 +14308,24 @@
   DONE;
 })
 
-(define_insn "<avx2_avx512f>_ashrv<mode>"
+(define_insn "<avx2_avx512f>_ashrv<mode><mask_name>"
   [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
 	(ashiftrt:VI48_AVX512F
 	  (match_operand:VI48_AVX512F 1 "register_operand" "v")
 	  (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX2"
-  "vpsrav<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "TARGET_AVX2 && <mask_mode512bit_condition>"
+  "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseishft")
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<avx2_avx512f>_<shift_insn>v<mode>"
+(define_insn "<avx2_avx512f>_<shift_insn>v<mode><mask_name>"
   [(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
 	(any_lshift:VI48_AVX2_48_AVX512F
 	  (match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
 	  (match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX2"
-  "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "TARGET_AVX2 && <mask_mode512bit_condition>"
+  "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseishft")
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -13535,12 +14408,13 @@
    (set_attr "btver2_decode" "double")
    (set_attr "mode" "V8SF")])
 
-(define_insn "avx512f_vcvtph2ps512"
+(define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name>"
   [(set (match_operand:V16SF 0 "register_operand" "=v")
-	(unspec:V16SF [(match_operand:V16HI 1 "nonimmediate_operand" "vm")]
-		      UNSPEC_VCVTPH2PS))]
+	(unspec:V16SF
+	  [(match_operand:V16HI 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_VCVTPH2PS))]
   "TARGET_AVX512F"
-  "vcvtph2ps\t{%1, %0|%0, %1}"
+  "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V16SF")])
@@ -13591,13 +14465,14 @@
    (set_attr "btver2_decode" "vector")
    (set_attr "mode" "V8SF")])
 
-(define_insn "avx512f_vcvtps2ph512"
+(define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
   [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
-	(unspec:V16HI [(match_operand:V16SF 1 "register_operand" "v")
-		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
-		     UNSPEC_VCVTPS2PH))]
+	(unspec:V16HI
+	  [(match_operand:V16SF 1 "register_operand" "v")
+	   (match_operand:SI 2 "const_0_to_255_operand" "N")]
+	  UNSPEC_VCVTPS2PH))]
   "TARGET_AVX512F"
-  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V16SF")])
@@ -13987,14 +14862,55 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx512f_getmant<mode>"
+(define_insn "avx512f_compress<mode>_mask"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(unspec:VI48F_512
+	  [(match_operand:VI48F_512 1 "register_operand" "v")
+	   (match_operand:VI48F_512 2 "vector_move_operand" "0C")
+	   (match_operand:<avx512fmaskmode> 3 "register_operand" "k")]
+	  UNSPEC_COMPRESS))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_compressstore<mode>_mask"
+  [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
+	(unspec:VI48F_512
+	  [(match_operand:VI48F_512 1 "register_operand" "x")
+	   (match_dup 0)
+	   (match_operand:<avx512fmaskmode> 2 "register_operand" "k")]
+	  UNSPEC_COMPRESS_STORE))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "store")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_expand<mode>_mask"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
+	(unspec:VI48F_512
+	  [(match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
+	   (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
+	   (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")]
+	  UNSPEC_EXPAND))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "none,load")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_getmant<mode><mask_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(unspec:VF_512
 	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")
 	   (match_operand:SI 2 "const_0_to_15_operand")]
 	  UNSPEC_GETMANT))]
   "TARGET_AVX512F"
-  "vgetmant<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+  "vgetmant<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
@@ -14013,23 +14929,23 @@
    [(set_attr "prefix" "evex")
    (set_attr "mode" "<ssescalarmode>")])
 
-(define_insn "clz<mode>2"
+(define_insn "clz<mode>2<mask_name>"
   [(set (match_operand:VI48_512 0 "register_operand" "=v")
 	(clz:VI48_512
 	  (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512CD"
-  "vplzcnt<ssemodesuffix>\t{%1, %0|%0, %1}"
+  "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "conflict<mode>"
+(define_insn "<mask_codefor>conflict<mode><mask_name>"
   [(set (match_operand:VI48_512 0 "register_operand" "=v")
 	(unspec:VI48_512
 	  [(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_CONFLICT))]
   "TARGET_AVX512CD"
-  "vpconflict<ssemodesuffix>\t{%1, %0|%0, %1}"
+  "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
diff --git a/gcc/config/i386/ssemath.h b/gcc/config/i386/ssemath.h
index 4e4656fa0d1..83abfddfeb7 100644
--- a/gcc/config/i386/ssemath.h
+++ b/gcc/config/i386/ssemath.h
@@ -19,6 +19,10 @@ along with GCC; see the file COPYING3.  If not see
 #undef TARGET_FPMATH_DEFAULT
 #define TARGET_FPMATH_DEFAULT (TARGET_SSE2 ? FPMATH_SSE : FPMATH_387)
 
+#undef TARGET_FPMATH_DEFAULT_P
+#define TARGET_FPMATH_DEFAULT_P(x) \
+  (TARGET_SSE2_P(x) ? FPMATH_SSE : FPMATH_387)
+
 #undef TARGET_SUBTARGET32_ISA_DEFAULT
 #define TARGET_SUBTARGET32_ISA_DEFAULT \
    (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2)
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
new file mode 100644
index 00000000000..6b45d058f22
--- /dev/null
+++ b/gcc/config/i386/subst.md
@@ -0,0 +1,56 @@
+;; GCC machine description for AVX512F instructions
+;; Copyright (C) 2013 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Some iterators for extending subst as much as possible
+;; All vectors (Use it for destination)
+(define_mode_iterator SUBST_V
+  [V16QI
+   V16HI V8HI
+   V16SI V8SI  V4SI
+   V8DI  V4DI  V2DI
+   V16SF V8SF  V4SF
+   V8DF  V4DF  V2DF])
+
+(define_subst_attr "mask_name" "mask" "" "_mask")
+(define_subst_attr "mask_applied" "mask" "false" "true")
+(define_subst_attr "mask_operand2" "mask" "" "%{%3%}%N2")
+(define_subst_attr "mask_operand3" "mask" "" "%{%4%}%N3")
+(define_subst_attr "mask_operand3_1" "mask" "" "%%{%%4%%}%%N3") ;; for sprintf
+(define_subst_attr "mask_operand4" "mask" "" "%{%5%}%N4")
+(define_subst_attr "mask_operand6" "mask" "" "%{%7%}%N6")
+(define_subst_attr "mask_operand11" "mask" "" "%{%12%}%N11")
+(define_subst_attr "mask_operand18" "mask" "" "%{%19%}%N18")
+(define_subst_attr "mask_operand19" "mask" "" "%{%20%}%N19")
+(define_subst_attr "mask_codefor" "mask" "*" "")
+(define_subst_attr "mask_mode512bit_condition" "mask" "1" "(GET_MODE_SIZE (GET_MODE (operands[0])) == 64)")
+(define_subst_attr "store_mask_constraint" "mask" "vm" "v")
+(define_subst_attr "store_mask_predicate" "mask" "nonimmediate_operand" "register_operand")
+(define_subst_attr "mask_prefix" "mask" "vex" "evex")
+(define_subst_attr "mask_prefix2" "mask" "maybe_vex" "evex")
+(define_subst_attr "mask_prefix3" "mask" "orig,vex" "evex")
+
+(define_subst "mask"
+  [(set (match_operand:SUBST_V 0)
+        (match_operand:SUBST_V 1))]
+  "TARGET_AVX512F"
+  [(set (match_dup 0)
+        (vec_merge:SUBST_V
+	  (match_dup 1)
+	  (match_operand:SUBST_V 2 "vector_move_operand" "0C")
+	  (match_operand:<avx512fmaskmode> 3 "register_operand" "k")))])
diff --git a/gcc/config/i386/winnt-cxx.c b/gcc/config/i386/winnt-cxx.c
index 92de46abd59..d466299abed 100644
--- a/gcc/config/i386/winnt-cxx.c
+++ b/gcc/config/i386/winnt-cxx.c
@@ -23,6 +23,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "cp/cp-tree.h" /* This is why we're a separate module.  */
 #include "flags.h"
 #include "tm_p.h"
diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c
index 94155d89a7f..2c1677eec88 100644
--- a/gcc/config/i386/winnt.c
+++ b/gcc/config/i386/winnt.c
@@ -27,6 +27,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "hard-reg-set.h"
 #include "output.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "varasm.h"
 #include "flags.h"
 #include "tm_p.h"
 #include "diagnostic-core.h"
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 23879f9ec18..4c13c3a0ec6 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -318,12 +318,12 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL: Use movups for misaligned loads instead
    of a sequence loading registers by parts.  */
 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
-          m_COREI7 | m_COREI7_AVX | m_AMDFAM10 | m_BDVER | m_BTVER | m_SLM | m_GENERIC)
+          m_COREI7 | m_COREI7_AVX | m_HASWELL | m_AMDFAM10 | m_BDVER | m_BTVER | m_SLM | m_GENERIC)
 
 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead
    of a sequence loading registers by parts.  */
 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
-          m_COREI7 | m_COREI7_AVX | m_BDVER | m_SLM | m_GENERIC)
+          m_COREI7 | m_COREI7_AVX | m_HASWELL | m_BDVER | m_SLM | m_GENERIC)
 
 /* Use packed single precision instructions where posisble.  I.e. movups instead
    of movupd.  */
@@ -376,15 +376,15 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
 /* AVX instruction selection tuning (some of SSE flags affects AVX, too)     */
 /*****************************************************************************/
 
-/* X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL: if true, unaligned loads are
+/* X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL: if false, unaligned loads are
    split.  */
 DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL, "256_unaligned_load_optimal", 
-          ~(m_COREI7 | m_GENERIC))
+          ~(m_COREI7 | m_COREI7_AVX | m_GENERIC))
 
-/* X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL: if true, unaligned loads are
+/* X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL: if false, unaligned stores are
    split.  */
-DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_load_optimal", 
-          ~(m_COREI7 | m_BDVER | m_GENERIC))
+DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_store_optimal", 
+          ~(m_COREI7 | m_COREI7_AVX | m_BDVER | m_GENERIC))
 
 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
    the auto-vectorizer.  */
diff --git a/gcc/config/ia64/ia64-c.c b/gcc/config/ia64/ia64-c.c
index 4d4dbc84369..6489668b7d9 100644
--- a/gcc/config/ia64/ia64-c.c
+++ b/gcc/config/ia64/ia64-c.c
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stringpool.h"
 #include "cpplib.h"
 #include "c-family/c-common.h"
 #include "c-family/c-pragma.h"
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 1bf3e2fe928..71bc666b685 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -25,6 +25,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
 #include "regs.h"
 #include "hard-reg-set.h"
 #include "insn-config.h"
@@ -50,6 +54,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "hash-table.h"
 #include "langhooks.h"
 #include "gimple.h"
+#include "gimplify.h"
 #include "intl.h"
 #include "df.h"
 #include "debug.h"
@@ -1524,7 +1529,9 @@ ia64_split_tmode_move (rtx operands[])
      the value it points to.  In that case we have to do the loads in
      the appropriate order so that the pointer is not destroyed too
      early.  Also we must not generate a postmodify for that second
-     load, or rws_access_regno will die.  */
+     load, or rws_access_regno will die.  And we must not generate a
+     postmodify for the second load if the destination register 
+     overlaps with the base register.  */
   if (GET_CODE (operands[1]) == MEM
       && reg_overlap_mentioned_p (operands[0], operands[1]))
     {
@@ -1534,7 +1541,11 @@ ia64_split_tmode_move (rtx operands[])
 
       if (REGNO (base) == REGNO (operands[0]))
 	reversed = true;
-      dead = true;
+
+      if (refers_to_regno_p (REGNO (operands[0]),
+			     REGNO (operands[0])+2,
+			     base, 0))
+	dead = true;
     }
   /* Another reason to do the moves in reversed order is if the first
      element of the target register pair is also the second element of
@@ -3481,7 +3492,7 @@ ia64_expand_prologue (void)
       edge e;
       edge_iterator ei;
 
-      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
 	if ((e->flags & EDGE_FAKE) == 0
 	    && (e->flags & EDGE_FALLTHRU) != 0)
 	  break;
@@ -10176,7 +10187,8 @@ ia64_asm_unwind_emit (FILE *asm_out_file, rtx insn)
 
   if (NOTE_INSN_BASIC_BLOCK_P (insn))
     {
-      last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
+      last_block = NOTE_BASIC_BLOCK (insn)->next_bb
+     == EXIT_BLOCK_PTR_FOR_FN (cfun);
 
       /* Restore unwind state from immediately before the epilogue.  */
       if (need_copy_state)
diff --git a/gcc/config/ia64/predicates.md b/gcc/config/ia64/predicates.md
index af7bc8ee4c2..31530be906d 100644
--- a/gcc/config/ia64/predicates.md
+++ b/gcc/config/ia64/predicates.md
@@ -72,9 +72,9 @@
 	    t = DECL_SIZE_UNIT (t);
 	  else
 	    t = TYPE_SIZE_UNIT (TREE_TYPE (t));
-	  if (t && host_integerp (t, 0))
+	  if (t && tree_fits_shwi_p (t))
 	    {
-	      size = tree_low_cst (t, 0);
+	      size = tree_to_shwi (t);
 	      if (size < 0)
 		size = 0;
 	    }
diff --git a/gcc/config/iq2000/iq2000.c b/gcc/config/iq2000/iq2000.c
index da6f757d6f9..e6d1171ca85 100644
--- a/gcc/config/iq2000/iq2000.c
+++ b/gcc/config/iq2000/iq2000.c
@@ -22,6 +22,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
 #include "rtl.h"
 #include "regs.h"
 #include "hard-reg-set.h"
@@ -1279,7 +1282,7 @@ iq2000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
 
       if (! type || TREE_CODE (type) != RECORD_TYPE
 	  || ! named  || ! TYPE_SIZE_UNIT (type)
-	  || ! host_integerp (TYPE_SIZE_UNIT (type), 1))
+	  || ! tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
 	ret = gen_rtx_REG (mode, regbase + *arg_words + bias);
       else
 	{
@@ -1289,7 +1292,7 @@ iq2000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
 	    if (TREE_CODE (field) == FIELD_DECL
 		&& TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
 		&& TYPE_PRECISION (TREE_TYPE (field)) == BITS_PER_WORD
-		&& host_integerp (bit_position (field), 0)
+		&& tree_fits_shwi_p (bit_position (field))
 		&& int_bit_position (field) % BITS_PER_WORD == 0)
 	      break;
 
@@ -1307,7 +1310,7 @@ iq2000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
 	      /* ??? If this is a packed structure, then the last hunk won't
 		 be 64 bits.  */
 	      chunks
-		= tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
+		= tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
 	      if (chunks + *arg_words + bias > (unsigned) MAX_ARGS_IN_REGISTERS)
 		chunks = MAX_ARGS_IN_REGISTERS - *arg_words - bias;
 
diff --git a/gcc/config/lm32/lm32.c b/gcc/config/lm32/lm32.c
index 6483a03e57d..6bddc488727 100644
--- a/gcc/config/lm32/lm32.c
+++ b/gcc/config/lm32/lm32.c
@@ -35,6 +35,7 @@
 #include "recog.h"
 #include "output.h"
 #include "tree.h"
+#include "calls.h"
 #include "expr.h"
 #include "flags.h"
 #include "reload.h"
diff --git a/gcc/config/m32c/cond.md b/gcc/config/m32c/cond.md
index 928d7efb289..1fae01d9836 100644
--- a/gcc/config/m32c/cond.md
+++ b/gcc/config/m32c/cond.md
@@ -75,7 +75,7 @@
 )
 
 (define_insn "stzx_16"
-  [(set (match_operand:QI 0 "mrai_operand" "=R0w,R0w,R0w")
+  [(set (match_operand:QI 0 "register_operand" "=R0w,R0w,R0w")
 	(if_then_else:QI (eq (reg:CC FLG_REGNO) (const_int 0))
 			 (match_operand:QI 1 "const_int_operand" "i,i,0")
 			 (match_operand:QI 2 "const_int_operand" "i,0,i")))]
@@ -88,7 +88,7 @@
 )
 
 (define_insn "stzx_24_<mode>"
-  [(set (match_operand:QHI 0 "mrai_operand" "=RraSd,RraSd,RraSd")
+  [(set (match_operand:QHI 0 "mra_operand" "=RraSd,RraSd,RraSd")
 	(if_then_else:QHI (eq (reg:CC FLG_REGNO) (const_int 0))
 			 (match_operand:QHI 1 "const_int_operand" "i,i,0")
 			 (match_operand:QHI 2 "const_int_operand" "i,0,i")))]
diff --git a/gcc/config/m32c/m32c-pragma.c b/gcc/config/m32c/m32c-pragma.c
index 6b0d05a8aaf..aa16a30453d 100644
--- a/gcc/config/m32c/m32c-pragma.c
+++ b/gcc/config/m32c/m32c-pragma.c
@@ -46,9 +46,9 @@ m32c_pragma_memregs (cpp_reader * reader ATTRIBUTE_UNUSED)
   type = pragma_lex (&val);
   if (type == CPP_NUMBER)
     {
-      if (host_integerp (val, 1))
+      if (tree_fits_uhwi_p (val))
 	{
-	  i = tree_low_cst (val, 1);
+	  i = tree_to_uhwi (val);
 
 	  type = pragma_lex (&val);
 	  if (type != CPP_EOF)
@@ -95,7 +95,7 @@ m32c_pragma_address (cpp_reader * reader ATTRIBUTE_UNUSED)
 	{
 	  if (var != error_mark_node)
 	    {
-	      unsigned uaddr = tree_low_cst (addr, 1);
+	      unsigned uaddr = tree_to_uhwi (addr);
 	      m32c_note_pragma_address (IDENTIFIER_POINTER (var), uaddr);
 	    }
 
diff --git a/gcc/config/m32c/m32c.c b/gcc/config/m32c/m32c.c
index deac40c228f..ec30b8d7f9b 100644
--- a/gcc/config/m32c/m32c.c
+++ b/gcc/config/m32c/m32c.c
@@ -36,6 +36,9 @@
 #include "diagnostic-core.h"
 #include "obstack.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
 #include "expr.h"
 #include "optabs.h"
 #include "except.h"
diff --git a/gcc/config/m32r/m32r.c b/gcc/config/m32r/m32r.c
index c94da538fcf..6cee5d728b3 100644
--- a/gcc/config/m32r/m32r.c
+++ b/gcc/config/m32r/m32r.c
@@ -22,6 +22,10 @@
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "stringpool.h"
+#include "calls.h"
 #include "rtl.h"
 #include "regs.h"
 #include "hard-reg-set.h"
diff --git a/gcc/config/m68k/m68k.c b/gcc/config/m68k/m68k.c
index 7035504bfe3..2b5bc22ecb2 100644
--- a/gcc/config/m68k/m68k.c
+++ b/gcc/config/m68k/m68k.c
@@ -22,6 +22,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "calls.h"
+#include "stor-layout.h"
+#include "varasm.h"
 #include "rtl.h"
 #include "function.h"
 #include "regs.h"
@@ -514,7 +517,7 @@ m68k_option_override (void)
     {
       enum target_device dev;
       dev = all_microarchs[M68K_DEFAULT_TUNE].device;
-      m68k_tune_flags = all_devices[dev]->flags;
+      m68k_tune_flags = all_devices[dev].flags;
     }
 #endif
   else
diff --git a/gcc/config/mcore/mcore.c b/gcc/config/mcore/mcore.c
index 6550b6905f0..6bd60702fa2 100644
--- a/gcc/config/mcore/mcore.c
+++ b/gcc/config/mcore/mcore.c
@@ -23,6 +23,10 @@
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "stringpool.h"
+#include "calls.h"
 #include "tm_p.h"
 #include "mcore.h"
 #include "regs.h"
diff --git a/gcc/config/mep/mep-pragma.c b/gcc/config/mep/mep-pragma.c
index 8a9c577f5a9..45a4b4496a4 100644
--- a/gcc/config/mep/mep-pragma.c
+++ b/gcc/config/mep/mep-pragma.c
@@ -232,9 +232,9 @@ mep_pragma_coprocessor_width (void)
   switch (type)
     {
     case CPP_NUMBER:
-      if (! host_integerp (val, 1))
+      if (! tree_fits_uhwi_p (val))
 	break;
-      i = tree_low_cst (val, 1);
+      i = tree_to_uhwi (val);
       /* This pragma no longer has any effect.  */
 #if 0
       if (i == 32)
@@ -273,7 +273,7 @@ mep_pragma_coprocessor_subclass (void)
   type = mep_pragma_lex (&val);
   if (type != CPP_CHAR)
     goto syntax_error;
-  class_letter = tree_low_cst (val, 1);
+  class_letter = tree_to_uhwi (val);
   if (class_letter >= 'A' && class_letter <= 'D')
     switch (class_letter)
       {
diff --git a/gcc/config/mep/mep.c b/gcc/config/mep/mep.c
index e67540a5611..6ce6c530077 100644
--- a/gcc/config/mep/mep.c
+++ b/gcc/config/mep/mep.c
@@ -24,6 +24,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "varasm.h"
+#include "calls.h"
+#include "stringpool.h"
+#include "stor-layout.h"
 #include "regs.h"
 #include "hard-reg-set.h"
 #include "insn-config.h"
@@ -48,6 +52,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "langhooks.h"
 #include "df.h"
 #include "gimple.h"
+#include "gimplify.h"
 #include "opts.h"
 #include "dumpfile.h"
 
diff --git a/gcc/config/microblaze/microblaze.c b/gcc/config/microblaze/microblaze.c
index 3258a95ef3a..93dede4d189 100644
--- a/gcc/config/microblaze/microblaze.c
+++ b/gcc/config/microblaze/microblaze.c
@@ -33,6 +33,9 @@
 #include "insn-attr.h"
 #include "recog.h"
 #include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
 #include "function.h"
 #include "expr.h"
 #include "flags.h"
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index c1d8f3a0ade..0aeb35aa6d6 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -34,6 +34,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "recog.h"
 #include "output.h"
 #include "tree.h"
+#include "varasm.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
 #include "function.h"
 #include "expr.h"
 #include "optabs.h"
@@ -51,6 +55,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "langhooks.h"
 #include "sched-int.h"
 #include "gimple.h"
+#include "gimplify.h"
 #include "bitmap.h"
 #include "diagnostic.h"
 #include "target-globals.h"
@@ -3967,7 +3972,7 @@ mips_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
     case DIV:
       /* Check for a reciprocal.  */
       if (float_mode_p
-	  && ISA_HAS_FP4
+	  && ISA_HAS_FP_RECIP_RSQRT (mode)
 	  && flag_unsafe_math_optimizations
 	  && XEXP (x, 0) == CONST1_RTX (mode))
 	{
@@ -5148,7 +5153,7 @@ mips_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
       && type != 0
       && TREE_CODE (type) == RECORD_TYPE
       && TYPE_SIZE_UNIT (type)
-      && host_integerp (TYPE_SIZE_UNIT (type), 1))
+      && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
     {
       tree field;
 
@@ -5157,7 +5162,7 @@ mips_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
 	if (TREE_CODE (field) == FIELD_DECL
 	    && SCALAR_FLOAT_TYPE_P (TREE_TYPE (field))
 	    && TYPE_PRECISION (TREE_TYPE (field)) == BITS_PER_WORD
-	    && host_integerp (bit_position (field), 0)
+	    && tree_fits_shwi_p (bit_position (field))
 	    && int_bit_position (field) % BITS_PER_WORD == 0)
 	  break;
 
@@ -14837,7 +14842,7 @@ r10k_simplify_address (rtx x, rtx insn)
 	      /* Replace the incoming value of $sp with
 		 virtual_incoming_args_rtx.  */
 	      if (x == stack_pointer_rtx
-		  && DF_REF_BB (def) == ENTRY_BLOCK_PTR)
+		  && DF_REF_BB (def) == ENTRY_BLOCK_PTR_FOR_FN (cfun))
 		newx = virtual_incoming_args_rtx;
 	    }
 	  else if (dominated_by_p (CDI_DOMINATORS, DF_REF_BB (use),
@@ -14926,7 +14931,7 @@ r10k_safe_address_p (rtx x, rtx insn)
    a link-time-constant address.  */
 
 static bool
-r10k_safe_mem_expr_p (tree expr, HOST_WIDE_INT offset)
+r10k_safe_mem_expr_p (tree expr, unsigned HOST_WIDE_INT offset)
 {
   HOST_WIDE_INT bitoffset, bitsize;
   tree inner, var_offset;
@@ -14939,7 +14944,7 @@ r10k_safe_mem_expr_p (tree expr, HOST_WIDE_INT offset)
     return false;
 
   offset += bitoffset / BITS_PER_UNIT;
-  return offset >= 0 && offset < tree_low_cst (DECL_SIZE_UNIT (inner), 1);
+  return offset < tree_to_uhwi (DECL_SIZE_UNIT (inner));
 }
 
 /* A for_each_rtx callback for which DATA points to the instruction
@@ -16071,10 +16076,13 @@ mips_reorg_process_insns (void)
   if (crtl->profile)
     cfun->machine->all_noreorder_p = false;
 
-  /* Code compiled with -mfix-vr4120 or -mfix-24k can't be all noreorder
-     because we rely on the assembler to work around some errata.
-     The r5900 too has several bugs.  */
-  if (TARGET_FIX_VR4120 || TARGET_FIX_24K || TARGET_MIPS5900)
+  /* Code compiled with -mfix-vr4120, -mfix-rm7000 or -mfix-24k can't be
+     all noreorder because we rely on the assembler to work around some
+     errata.  The R5900 too has several bugs.  */
+  if (TARGET_FIX_VR4120
+      || TARGET_FIX_RM7000
+      || TARGET_FIX_24K
+      || TARGET_MIPS5900)
     cfun->machine->all_noreorder_p = false;
 
   /* The same is true for -mfix-vr4130 if we might generate MFLO or
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index c4a2a4a6862..021419c0a6a 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -882,13 +882,18 @@ struct mips_cpu_info {
 
 /* This is a catch all for other mips4 instructions: indexed load, the
    FP madd and msub instructions, and the FP recip and recip sqrt
-   instructions.  */
+   instructions.  Note that this macro should only be used by other
+   ISA_HAS_* macros.  */
 #define ISA_HAS_FP4		((ISA_MIPS4				\
-				  || (ISA_MIPS32R2 && TARGET_FLOAT64)	\
+				  || ISA_MIPS32R2			\
 				  || ISA_MIPS64				\
 				  || ISA_MIPS64R2)			\
 				 && !TARGET_MIPS16)
 
+/* ISA has floating-point indexed load and store instructions
+   (LWXC1, LDXC1, SWXC1 and SDXC1).  */
+#define ISA_HAS_LXC1_SXC1	ISA_HAS_FP4
+
 /* ISA has paired-single instructions.  */
 #define ISA_HAS_PAIRED_SINGLE	(ISA_MIPS32R2 || ISA_MIPS64 || ISA_MIPS64R2)
 
@@ -906,21 +911,34 @@ struct mips_cpu_info {
 #define GENERATE_MADD_MSUB	(TARGET_IMADD && !TARGET_MIPS16)
 
 /* ISA has floating-point madd and msub instructions 'd = a * b [+-] c'.  */
-#define ISA_HAS_FP_MADD4_MSUB4  (ISA_HAS_FP4				\
-				 || (ISA_MIPS32R2 && !TARGET_MIPS16))
+#define ISA_HAS_FP_MADD4_MSUB4  ISA_HAS_FP4
 
 /* ISA has floating-point madd and msub instructions 'c = a * b [+-] c'.  */
 #define ISA_HAS_FP_MADD3_MSUB3  TARGET_LOONGSON_2EF
 
 /* ISA has floating-point nmadd and nmsub instructions
    'd = -((a * b) [+-] c)'.  */
-#define ISA_HAS_NMADD4_NMSUB4	(ISA_HAS_FP4				\
-				 || (ISA_MIPS32R2 && !TARGET_MIPS16))
+#define ISA_HAS_NMADD4_NMSUB4	ISA_HAS_FP4
 
 /* ISA has floating-point nmadd and nmsub instructions
    'c = -((a * b) [+-] c)'.  */
 #define ISA_HAS_NMADD3_NMSUB3	TARGET_LOONGSON_2EF
 
+/* ISA has floating-point RECIP.fmt and RSQRT.fmt instructions.  The
+   MIPS64 rev. 1 ISA says that RECIP.D and RSQRT.D are unpredictable when
+   doubles are stored in pairs of FPRs, so for safety's sake, we apply
+   this restriction to the MIPS IV ISA too.  */
+#define ISA_HAS_FP_RECIP_RSQRT(MODE)					\
+				(((ISA_HAS_FP4				\
+				   && ((MODE) == SFmode			\
+				       || ((TARGET_FLOAT64		\
+					    || ISA_MIPS32R2		\
+					    || ISA_MIPS64R2)		\
+					   && (MODE) == DFmode)))	\
+				  || (TARGET_SB1			\
+				      && (MODE) == V2SFmode))		\
+				 && !TARGET_MIPS16)
+
 /* ISA has count leading zeroes/ones instruction (not implemented).  */
 #define ISA_HAS_CLZ_CLO		((ISA_MIPS32				\
 				  || ISA_MIPS32R2			\
@@ -991,11 +1009,7 @@ struct mips_cpu_info {
    'prefx', along with TARGET_HARD_FLOAT and TARGET_DOUBLE_FLOAT.
    (prefx is a cop1x instruction, so can only be used if FP is
    enabled.)  */
-#define ISA_HAS_PREFETCHX	((ISA_MIPS4				\
-				  || ISA_MIPS32R2			\
-				  || ISA_MIPS64				\
-				  || ISA_MIPS64R2)			\
-				 && !TARGET_MIPS16)
+#define ISA_HAS_PREFETCHX	ISA_HAS_FP4
 
 /* True if trunc.w.s and trunc.w.d are real (not synthetic)
    instructions.  Both require TARGET_HARD_FLOAT, and trunc.w.d
@@ -1167,6 +1181,7 @@ struct mips_cpu_info {
 %{meva} %{mno-eva} \
 %{msmartmips} %{mno-smartmips} \
 %{mmt} %{mno-mt} \
+%{mfix-rm7000} %{mno-fix-rm7000} \
 %{mfix-vr4120} %{mfix-vr4130} \
 %{mfix-24k} \
 %{noasmopt:-O0; O0|fno-delayed-branch:-O1; O*:-O2; :-O1} \
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 3554beb3033..1bd1ec5e2cc 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -881,15 +881,6 @@
 (define_mode_attr sqrt_condition
   [(SF "!ISA_MIPS1") (DF "!ISA_MIPS1") (V2SF "TARGET_SB1")])
 
-;; This attribute gives the conditions under which RECIP.fmt and RSQRT.fmt
-;; instructions can be used.  The MIPS32 and MIPS64 ISAs say that RECIP.D
-;; and RSQRT.D are unpredictable when doubles are stored in pairs of FPRs,
-;; so for safety's sake, we apply this restriction to all targets.
-(define_mode_attr recip_condition
-  [(SF "ISA_HAS_FP4")
-   (DF "ISA_HAS_FP4 && TARGET_FLOAT64")
-   (V2SF "TARGET_SB1")])
-
 ;; This code iterator allows signed and unsigned widening multiplications
 ;; to use the same template.
 (define_code_iterator any_extend [sign_extend zero_extend])
@@ -2501,7 +2492,8 @@
   "<divide_condition>"
 {
   if (const_1_operand (operands[1], <MODE>mode))
-    if (!(<recip_condition> && flag_unsafe_math_optimizations))
+    if (!(ISA_HAS_FP_RECIP_RSQRT (<MODE>mode)
+	  && flag_unsafe_math_optimizations))
       operands[1] = force_reg (<MODE>mode, operands[1]);
 })
 
@@ -2539,7 +2531,7 @@
   [(set (match_operand:ANYF 0 "register_operand" "=f")
 	(div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
 		  (match_operand:ANYF 2 "register_operand" "f")))]
-  "<recip_condition> && flag_unsafe_math_optimizations"
+  "ISA_HAS_FP_RECIP_RSQRT (<MODE>mode) && flag_unsafe_math_optimizations"
 {
   if (TARGET_FIX_SB1)
     return "recip.<fmt>\t%0,%2\;mov.<fmt>\t%0,%0";
@@ -2674,7 +2666,7 @@
   [(set (match_operand:ANYF 0 "register_operand" "=f")
 	(div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
 		  (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f"))))]
-  "<recip_condition> && flag_unsafe_math_optimizations"
+  "ISA_HAS_FP_RECIP_RSQRT (<MODE>mode) && flag_unsafe_math_optimizations"
 {
   if (TARGET_FIX_SB1)
     return "rsqrt.<fmt>\t%0,%2\;mov.<fmt>\t%0,%0";
@@ -2692,7 +2684,7 @@
   [(set (match_operand:ANYF 0 "register_operand" "=f")
 	(sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
 			     (match_operand:ANYF 2 "register_operand" "f"))))]
-  "<recip_condition> && flag_unsafe_math_optimizations"
+  "ISA_HAS_FP_RECIP_RSQRT (<MODE>mode) && flag_unsafe_math_optimizations"
 {
   if (TARGET_FIX_SB1)
     return "rsqrt.<fmt>\t%0,%2\;mov.<fmt>\t%0,%0";
@@ -4448,7 +4440,7 @@
   [(set (match_operand:ANYF 0 "register_operand" "=f")
 	(mem:ANYF (plus:P (match_operand:P 1 "register_operand" "d")
 			  (match_operand:P 2 "register_operand" "d"))))]
-  "ISA_HAS_FP4"
+  "ISA_HAS_LXC1_SXC1"
   "<ANYF:loadx>\t%0,%1(%2)"
   [(set_attr "type" "fpidxload")
    (set_attr "mode" "<ANYF:UNITMODE>")])
@@ -4457,7 +4449,7 @@
   [(set (mem:ANYF (plus:P (match_operand:P 1 "register_operand" "d")
 			  (match_operand:P 2 "register_operand" "d")))
 	(match_operand:ANYF 0 "register_operand" "f"))]
-  "ISA_HAS_FP4"
+  "ISA_HAS_LXC1_SXC1"
   "<ANYF:storex>\t%0,%1(%2)"
   [(set_attr "type" "fpidxstore")
    (set_attr "mode" "<ANYF:UNITMODE>")])
@@ -6776,7 +6768,7 @@
 (define_insn "*mov<GPR:mode>_on_<MOVECC:mode>"
   [(set (match_operand:GPR 0 "register_operand" "=d,d")
 	(if_then_else:GPR
-	 (match_operator:MOVECC 4 "equality_operator"
+	 (match_operator 4 "equality_operator"
 		[(match_operand:MOVECC 1 "register_operand" "<MOVECC:reg>,<MOVECC:reg>")
 		 (const_int 0)])
 	 (match_operand:GPR 2 "reg_or_0_operand" "dJ,0")
@@ -6788,10 +6780,23 @@
   [(set_attr "type" "condmove")
    (set_attr "mode" "<GPR:MODE>")])
 
+(define_insn "*mov<GPR:mode>_on_<GPR2:mode>_ne"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+       (if_then_else:GPR
+        (match_operand:GPR2 1 "register_operand" "<GPR2:reg>,<GPR2:reg>")
+        (match_operand:GPR 2 "reg_or_0_operand" "dJ,0")
+        (match_operand:GPR 3 "reg_or_0_operand" "0,dJ")))]
+  "ISA_HAS_CONDMOVE"
+  "@
+    movn\t%0,%z2,%1
+    movz\t%0,%z3,%1"
+  [(set_attr "type" "condmove")
+   (set_attr "mode" "<GPR:MODE>")])
+
 (define_insn "*mov<SCALARF:mode>_on_<MOVECC:mode>"
   [(set (match_operand:SCALARF 0 "register_operand" "=f,f")
 	(if_then_else:SCALARF
-	 (match_operator:MOVECC 4 "equality_operator"
+	 (match_operator 4 "equality_operator"
 		[(match_operand:MOVECC 1 "register_operand" "<MOVECC:reg>,<MOVECC:reg>")
 		 (const_int 0)])
 	 (match_operand:SCALARF 2 "register_operand" "f,0")
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 0324041dbea..10faf4216a5 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -165,6 +165,10 @@ mfix-r4400
 Target Report Mask(FIX_R4400)
 Work around certain R4400 errata
 
+mfix-rm7000
+Target Report Var(TARGET_FIX_RM7000)
+Work around certain RM7000 errata
+
 mfix-r10000
 Target Report Mask(FIX_R10000)
 Work around certain R10000 errata
diff --git a/gcc/config/mmix/mmix.c b/gcc/config/mmix/mmix.c
index 34b4fea7503..eb43af71d08 100644
--- a/gcc/config/mmix/mmix.c
+++ b/gcc/config/mmix/mmix.c
@@ -31,6 +31,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "basic-block.h"
 #include "flags.h"
 #include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
 #include "function.h"
 #include "expr.h"
 #include "diagnostic-core.h"
diff --git a/gcc/config/mn10300/mn10300.c b/gcc/config/mn10300/mn10300.c
index df563d03eac..7304e8638c7 100644
--- a/gcc/config/mn10300/mn10300.c
+++ b/gcc/config/mn10300/mn10300.c
@@ -24,6 +24,9 @@
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
 #include "regs.h"
 #include "hard-reg-set.h"
 #include "insn-config.h"
@@ -3226,7 +3229,6 @@ mn10300_loop_contains_call_insn (loop_p loop)
 static void
 mn10300_scan_for_setlb_lcc (void)
 {
-  loop_iterator liter;
   loop_p loop;
 
   DUMP ("Looking for loops that can use the SETLB insn", NULL_RTX);
@@ -3241,7 +3243,7 @@ mn10300_scan_for_setlb_lcc (void)
      if an inner loop is not suitable for use with the SETLB/Lcc insns, it may
      be the case that its parent loop is suitable.  Thus we should check all
      loops, but work from the innermost outwards.  */
-  FOR_EACH_LOOP (liter, loop, LI_ONLY_INNERMOST)
+  FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
     {
       const char * reason = NULL;
 
diff --git a/gcc/config/moxie/moxie.c b/gcc/config/moxie/moxie.c
index d4f7d6d9d68..abba0aebd2d 100644
--- a/gcc/config/moxie/moxie.c
+++ b/gcc/config/moxie/moxie.c
@@ -36,6 +36,9 @@
 #include "diagnostic-core.h"
 #include "obstack.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
 #include "expr.h"
 #include "optabs.h"
 #include "except.h"
diff --git a/gcc/config/msp430/msp430.c b/gcc/config/msp430/msp430.c
index 1ebb583c1ed..daff4ae1623 100644
--- a/gcc/config/msp430/msp430.c
+++ b/gcc/config/msp430/msp430.c
@@ -23,6 +23,8 @@
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "calls.h"
 #include "rtl.h"
 #include "regs.h"
 #include "hard-reg-set.h"
diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c
index 2700a872360..008f088df57 100644
--- a/gcc/config/nds32/nds32.c
+++ b/gcc/config/nds32/nds32.c
@@ -25,6 +25,9 @@
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
 #include "rtl.h"
 #include "regs.h"
 #include "hard-reg-set.h"
@@ -4563,7 +4566,7 @@ nds32_fp_as_gp_check_available (void)
       || frame_pointer_needed
       || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
       || (cfun->stdarg == 1)
-      || (find_fallthru_edge (EXIT_BLOCK_PTR->preds) == NULL))
+      || (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL))
     return 0;
 
   /* Now we can check the possibility of using fp_as_gp optimization.  */
@@ -4677,7 +4680,7 @@ nds32_output_casesi_pc_relative (rtx *operands)
   enum machine_mode mode;
   rtx diff_vec;
 
-  diff_vec = PATTERN (next_active_insn (operands[1]));
+  diff_vec = PATTERN (NEXT_INSN (operands[1]));
 
   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
 
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c
index 260830f00d7..2aa63c6bd0c 100644
--- a/gcc/config/pa/pa.c
+++ b/gcc/config/pa/pa.c
@@ -30,6 +30,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "insn-attr.h"
 #include "flags.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "stringpool.h"
+#include "varasm.h"
+#include "calls.h"
 #include "output.h"
 #include "dbxout.h"
 #include "except.h"
diff --git a/gcc/config/pdp11/pdp11.c b/gcc/config/pdp11/pdp11.c
index 0a310c50d79..42237b5d798 100644
--- a/gcc/config/pdp11/pdp11.c
+++ b/gcc/config/pdp11/pdp11.c
@@ -33,6 +33,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "flags.h"
 #include "recog.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
 #include "expr.h"
 #include "diagnostic-core.h"
 #include "tm_p.h"
diff --git a/gcc/config/picochip/picochip.c b/gcc/config/picochip/picochip.c
index f10d1cea830..4756cb78b72 100644
--- a/gcc/config/picochip/picochip.c
+++ b/gcc/config/picochip/picochip.c
@@ -34,6 +34,10 @@ along with GCC; see the file COPYING3.  If not, see
 #include "recog.h"
 #include "obstack.h"
 #include "tree.h"
+#include "calls.h"
+#include "stor-layout.h"
+#include "stringpool.h"
+#include "varasm.h"
 #include "expr.h"
 #include "optabs.h"
 #include "except.h"
@@ -810,7 +814,7 @@ picochip_compute_arg_size (const_tree type, enum machine_mode mode)
   int type_size_in_units = 0;
 
   if (type)
-    type_size_in_units = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
+    type_size_in_units = tree_to_uhwi (TYPE_SIZE_UNIT (type));
   else
     type_size_in_units = GET_MODE_SIZE (mode);
 
diff --git a/gcc/config/rl78/rl78.c b/gcc/config/rl78/rl78.c
index f071e31daf2..72aefc205a1 100644
--- a/gcc/config/rl78/rl78.c
+++ b/gcc/config/rl78/rl78.c
@@ -23,6 +23,9 @@
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
 #include "rtl.h"
 #include "regs.h"
 #include "hard-reg-set.h"
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 9f4b6a4b392..23a425e30cb 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -59,8 +59,6 @@
    UNSPEC_VSUMSWS
    UNSPEC_VPERM
    UNSPEC_VPERM_UNS
-   UNSPEC_VPERM_X
-   UNSPEC_VPERM_UNS_X
    UNSPEC_VRFIN
    UNSPEC_VCFUX
    UNSPEC_VCFSX
@@ -1393,91 +1391,21 @@
   "vrfiz %0,%1"
   [(set_attr "type" "vecfloat")])
 
-(define_insn_and_split "altivec_vperm_<mode>"
+(define_insn "altivec_vperm_<mode>"
   [(set (match_operand:VM 0 "register_operand" "=v")
 	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
 		    (match_operand:VM 2 "register_operand" "v")
 		    (match_operand:V16QI 3 "register_operand" "v")]
-		   UNSPEC_VPERM_X))]
-  "TARGET_ALTIVEC"
-  "#"
-  "!reload_in_progress && !reload_completed"
-  [(set (match_dup 0) (match_dup 4))]
-{
-  if (BYTES_BIG_ENDIAN)
-    operands[4] = gen_rtx_UNSPEC (<MODE>mode,
-                                  gen_rtvec (3, operands[1],
- 		                             operands[2], operands[3]),
-                                  UNSPEC_VPERM);
-  else
-    {
-      /* We want to subtract from 31, but we can't vspltisb 31 since
-         it's out of range.  -1 works as well because only the low-order
-         five bits of the permute control vector elements are used.  */
-      rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
-                                         gen_rtx_CONST_INT (QImode, -1));
-      rtx tmp = gen_reg_rtx (V16QImode);
-      emit_move_insn (tmp, splat);
-      rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]);
-      emit_move_insn (tmp, sel);
-      operands[4] = gen_rtx_UNSPEC (<MODE>mode,
-                                    gen_rtvec (3, operands[2],
-		 		               operands[1], tmp),
-		                    UNSPEC_VPERM);
-    }
-}
-  [(set_attr "type" "vecperm")])
-
-(define_insn "*altivec_vperm_<mode>_internal"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
-		    (match_operand:VM 2 "register_operand" "v")
-		    (match_operand:V16QI 3 "register_operand" "+v")]
 		   UNSPEC_VPERM))]
   "TARGET_ALTIVEC"
   "vperm %0,%1,%2,%3"
   [(set_attr "type" "vecperm")])
 
-(define_insn_and_split "altivec_vperm_<mode>_uns"
+(define_insn "altivec_vperm_<mode>_uns"
   [(set (match_operand:VM 0 "register_operand" "=v")
 	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
 		    (match_operand:VM 2 "register_operand" "v")
 		    (match_operand:V16QI 3 "register_operand" "v")]
-		   UNSPEC_VPERM_UNS_X))]
-  "TARGET_ALTIVEC"
-  "#"
-  "!reload_in_progress && !reload_completed"
-  [(set (match_dup 0) (match_dup 4))]
-{
-  if (BYTES_BIG_ENDIAN)
-    operands[4] = gen_rtx_UNSPEC (<MODE>mode,
-                                  gen_rtvec (3, operands[1],
-				             operands[2], operands[3]),
-                                  UNSPEC_VPERM_UNS);
-  else
-    {
-      /* We want to subtract from 31, but we can't vspltisb 31 since
-         it's out of range.  -1 works as well because only the low-order
-         five bits of the permute control vector elements are used.  */
-      rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
-                                         gen_rtx_CONST_INT (QImode, -1));
-      rtx tmp = gen_reg_rtx (V16QImode);
-      emit_move_insn (tmp, splat);
-      rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]);
-      emit_move_insn (tmp, sel);
-      operands[4] = gen_rtx_UNSPEC (<MODE>mode,
-                                    gen_rtvec (3, operands[2],
-				               operands[1], tmp),
-		                    UNSPEC_VPERM_UNS);
-    }
-}
-  [(set_attr "type" "vecperm")])
-
-(define_insn "*altivec_vperm_<mode>_uns_internal"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
-		    (match_operand:VM 2 "register_operand" "v")
-		    (match_operand:V16QI 3 "register_operand" "+v")]
 		   UNSPEC_VPERM_UNS))]
   "TARGET_ALTIVEC"
   "vperm %0,%1,%2,%3"
diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h
index c1adbd78a4d..66b483ec116 100644
--- a/gcc/config/rs6000/linux64.h
+++ b/gcc/config/rs6000/linux64.h
@@ -25,9 +25,6 @@
 
 #ifndef RS6000_BI_ARCH
 
-#undef	DEFAULT_ABI
-#define	DEFAULT_ABI ABI_AIX
-
 #undef	TARGET_64BIT
 #define	TARGET_64BIT 1
 
@@ -74,7 +71,11 @@ extern int dot_symbols;
 #undef  PROCESSOR_DEFAULT
 #define PROCESSOR_DEFAULT PROCESSOR_POWER7
 #undef  PROCESSOR_DEFAULT64
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
+#define PROCESSOR_DEFAULT64 PROCESSOR_POWER8
+#else
 #define PROCESSOR_DEFAULT64 PROCESSOR_POWER7
+#endif
 
 /* We don't need to generate entries in .fixup, except when
    -mrelocatable or -mrelocatable-lib is given.  */
@@ -88,6 +89,12 @@ extern int dot_symbols;
 #define INVALID_64BIT "-m%s not supported in this configuration"
 #define INVALID_32BIT INVALID_64BIT
 
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
+#define ELFv2_ABI_CHECK (rs6000_elf_abi != 1)
+#else
+#define ELFv2_ABI_CHECK (rs6000_elf_abi == 2)
+#endif
+
 #undef	SUBSUBTARGET_OVERRIDE_OPTIONS
 #define	SUBSUBTARGET_OVERRIDE_OPTIONS				\
   do								\
@@ -102,6 +109,12 @@ extern int dot_symbols;
 	      error (INVALID_64BIT, "call");			\
 	    }							\
 	  dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");	\
+	  if (ELFv2_ABI_CHECK)					\
+	    {							\
+	      rs6000_current_abi = ABI_ELFv2;			\
+	      if (dot_symbols)					\
+		error ("-mcall-aixdesc incompatible with -mabi=elfv2"); \
+	    }							\
 	  if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)	\
 	    {							\
 	      rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;	\
@@ -355,7 +368,11 @@ extern int dot_symbols;
 #define LINK_OS_DEFAULT_SPEC "%(link_os_linux)"
 
 #define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1"
-#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld64.so.1"
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
+#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv1:/lib64/ld64.so.1;:/lib64/ld64.so.2}"
+#else
+#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv2:/lib64/ld64.so.2;:/lib64/ld64.so.1}"
+#endif
 #define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0"
 #define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0"
 #if DEFAULT_LIBC == LIBC_UCLIBC
diff --git a/gcc/config/rs6000/option-defaults.h b/gcc/config/rs6000/option-defaults.h
index 2d7e36a6137..0d7ba1ea3ac 100644
--- a/gcc/config/rs6000/option-defaults.h
+++ b/gcc/config/rs6000/option-defaults.h
@@ -54,6 +54,7 @@
    --with-float is ignored if -mhard-float or -msoft-float are
      specified.  */
 #define OPTION_DEFAULT_SPECS \
+  {"abi", "%{!mabi=elfv*:-mabi=%(VALUE)}" }, \
   {"tune", "%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}" }, \
   {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \
   {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \
diff --git a/gcc/config/rs6000/ppc-asm.h b/gcc/config/rs6000/ppc-asm.h
index db490b6c9b8..8108efd07f5 100644
--- a/gcc/config/rs6000/ppc-asm.h
+++ b/gcc/config/rs6000/ppc-asm.h
@@ -256,7 +256,30 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  * the real function with one or two leading periods respectively.
  */
 
-#if defined (__powerpc64__)
+#if defined(__powerpc64__) && _CALL_ELF == 2
+
+/* Defining "toc" above breaks @toc in assembler code.  */
+#undef toc
+
+#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name)
+#define JUMP_TARGET(name) FUNC_NAME(name)
+#define FUNC_START(name) \
+	.type FUNC_NAME(name),@function; \
+	.globl FUNC_NAME(name); \
+FUNC_NAME(name): \
+0:	addis 2,12,(.TOC.-0b)@ha; \
+	addi 2,2,(.TOC.-0b)@l; \
+	.localentry FUNC_NAME(name),.-FUNC_NAME(name)
+
+#define HIDDEN_FUNC(name) \
+  FUNC_START(name) \
+  .hidden FUNC_NAME(name);
+
+#define FUNC_END(name) \
+	.size FUNC_NAME(name),.-FUNC_NAME(name)
+
+#elif defined (__powerpc64__)
+
 #define FUNC_NAME(name) GLUE(.,name)
 #define JUMP_TARGET(name) FUNC_NAME(name)
 #define FUNC_START(name) \
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index b5bff044779..c3118beb516 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1050,7 +1050,8 @@
   (and (match_code "symbol_ref")
        (match_test "(DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op))
 		    && ((SYMBOL_REF_LOCAL_P (op)
-			 && (DEFAULT_ABI != ABI_AIX
+			 && ((DEFAULT_ABI != ABI_AIX
+			      && DEFAULT_ABI != ABI_ELFv2)
 			     || !SYMBOL_REF_EXTERNAL_P (op)))
 		        || (op == XEXP (DECL_RTL (current_function_decl),
 						  0)))")))
@@ -1538,6 +1539,26 @@
   return 1;
 })
 
+;; Return 1 if OP is valid for crsave insn, known to be a PARALLEL.
+(define_predicate "crsave_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+
+  for (i = 1; i < count; i++)
+    {
+      rtx exp = XVECEXP (op, 0, i);
+
+      if (GET_CODE (exp) != USE
+	  || GET_CODE (XEXP (exp, 0)) != REG
+	  || GET_MODE (XEXP (exp, 0)) != CCmode
+	  || ! CR_REGNO_P (REGNO (XEXP (exp, 0))))
+	return 0;
+    }
+  return 1;
+})
+
 ;; Return 1 if OP is valid for lmw insn, known to be a PARALLEL.
 (define_predicate "lmw_operation"
   (match_code "parallel")
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index d58e6865193..667e5a6de18 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -26,6 +26,8 @@
 #include "tm.h"
 #include "cpplib.h"
 #include "tree.h"
+#include "stor-layout.h"
+#include "stringpool.h"
 #include "c-family/c-common.h"
 #include "c-family/c-pragma.h"
 #include "diagnostic-core.h"
@@ -461,6 +463,10 @@ rs6000_cpu_cpp_builtins (cpp_reader *pfile)
     case ABI_AIX:
       builtin_define ("_CALL_AIXDESC");
       builtin_define ("_CALL_AIX");
+      builtin_define ("_CALL_ELF=1");
+      break;
+    case ABI_ELFv2:
+      builtin_define ("_CALL_ELF=2");
       break;
     case ABI_DARWIN:
       builtin_define ("_CALL_DARWIN");
@@ -473,6 +479,13 @@ rs6000_cpu_cpp_builtins (cpp_reader *pfile)
   if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
     builtin_define ("__NO_FPRS__");
 
+  /* Whether aggregates passed by value are aligned to a 16 byte boundary
+     if their alignment is 16 bytes or larger.  */
+  if ((TARGET_MACHO && rs6000_darwin64_abi)
+      || DEFAULT_ABI == ABI_ELFv2
+      || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
+    builtin_define ("__STRUCT_PARM_ALIGN__=16");
+
   /* Generate defines for Xilinx FPU. */
   if (rs6000_xilinx_fpu) 
     {
diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h
index d528a4fd87a..6d6d69379d5 100644
--- a/gcc/config/rs6000/rs6000-opts.h
+++ b/gcc/config/rs6000/rs6000-opts.h
@@ -107,7 +107,8 @@ enum group_termination
 /* Enumeration to give which calling sequence to use.  */
 enum rs6000_abi {
   ABI_NONE,
-  ABI_AIX,			/* IBM's AIX */
+  ABI_AIX,			/* IBM's AIX, or Linux ELFv1 */
+  ABI_ELFv2,			/* Linux ELFv2 ABI */
   ABI_V4,			/* System V.4/eabi */
   ABI_DARWIN			/* Apple's Darwin (OS X kernel) */
 };
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index d1d1737dca1..2ad6f08f47d 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -158,6 +158,7 @@ extern tree altivec_resolve_overloaded_builtin (location_t, tree, void *);
 extern rtx rs6000_libcall_value (enum machine_mode);
 extern rtx rs6000_va_arg (tree, tree);
 extern int function_ok_for_sibcall (tree);
+extern int rs6000_reg_parm_stack_space (tree);
 extern void rs6000_elf_declare_function_name (FILE *, const char *, tree);
 extern bool rs6000_elf_in_small_data_p (const_tree);
 #ifdef ARGS_SIZE_RTX
@@ -182,7 +183,8 @@ extern unsigned int rs6000_dbx_register_number (unsigned int);
 extern void rs6000_emit_epilogue (int);
 extern void rs6000_emit_eh_reg_restore (rtx, rtx);
 extern const char * output_isel (rtx *);
-extern void rs6000_call_indirect_aix (rtx, rtx, rtx);
+extern void rs6000_call_aix (rtx, rtx, rtx, rtx);
+extern void rs6000_sibcall_aix (rtx, rtx, rtx, rtx);
 extern void rs6000_aix_asm_output_dwarf_table_ref (char *);
 extern void get_ppc476_thunk_name (char name[32]);
 extern bool rs6000_overloaded_builtin_p (enum rs6000_builtins);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 8c8ee9fae0e..add91c9f055 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -32,6 +32,11 @@
 #include "recog.h"
 #include "obstack.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "print-tree.h"
+#include "varasm.h"
 #include "expr.h"
 #include "optabs.h"
 #include "except.h"
@@ -52,6 +57,9 @@
 #include "cfgloop.h"
 #include "sched-int.h"
 #include "gimple.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
 #include "intl.h"
 #include "params.h"
 #include "tm-constrs.h"
@@ -97,6 +105,7 @@ typedef struct rs6000_stack {
   int spe_gp_save_offset;	/* offset to save spe 64-bit gprs  */
   int varargs_save_offset;	/* offset to save the varargs registers */
   int ehrd_offset;		/* offset to EH return data */
+  int ehcr_offset;		/* offset to EH CR field data */
   int reg_size;			/* register size (4 or 8) */
   HOST_WIDE_INT vars_size;	/* variable save area size */
   int parm_size;		/* outgoing parameter size */
@@ -140,6 +149,8 @@ typedef struct GTY(()) machine_function
      64-bits wide and is allocated early enough so that the offset
      does not overflow the 16-bit load/store offset field.  */
   rtx sdmode_stack_slot;
+  /* Flag if r2 setup is needed with ELFv2 ABI.  */
+  bool r2_setup_needed;
 } machine_function;
 
 /* Support targetm.vectorize.builtin_mask_for_load.  */
@@ -1446,6 +1457,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #undef TARGET_RETURN_IN_MEMORY
 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
 
+#undef TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB rs6000_return_in_msb
+
 #undef TARGET_SETUP_INCOMING_VARARGS
 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
 
@@ -2234,6 +2248,7 @@ rs6000_debug_reg_global (void)
     {
     case ABI_NONE:	abi_str = "none";	break;
     case ABI_AIX:	abi_str = "aix";	break;
+    case ABI_ELFv2:	abi_str = "ELFv2";	break;
     case ABI_V4:	abi_str = "V4";		break;
     case ABI_DARWIN:	abi_str = "darwin";	break;
     default:		abi_str = "unknown";	break;
@@ -3207,6 +3222,12 @@ rs6000_option_override_internal (bool global_init_p)
 	}
     }
 
+  /* If little-endian, default to -mstrict-align on older processors.
+     Testing for htm matches power8 and later.  */
+  if (!BYTES_BIG_ENDIAN
+      && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
+    rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
+
   /* Add some warnings for VSX.  */
   if (TARGET_VSX)
     {
@@ -3672,7 +3693,7 @@ rs6000_option_override_internal (bool global_init_p)
 
       /* We should always be splitting complex arguments, but we can't break
 	 Linux and Darwin ABIs at the moment.  For now, only AIX is fixed.  */
-      if (DEFAULT_ABI != ABI_AIX)
+      if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
 	targetm.calls.split_complex_arg = NULL;
     }
 
@@ -4764,7 +4785,11 @@ rs6000_file_start (void)
 	putc ('\n', file);
     }
 
-  if (DEFAULT_ABI == ABI_AIX || (TARGET_ELF && flag_pic == 2))
+  if (DEFAULT_ABI == ABI_ELFv2)
+    fprintf (file, "\t.abiversion 2\n");
+
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
+      || (TARGET_ELF && flag_pic == 2))
     {
       switch_to_section (toc_section);
       switch_to_section (text_section);
@@ -6111,10 +6136,10 @@ offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
 	  if (!DECL_SIZE_UNIT (decl))
 	    return false;
 
-	  if (!host_integerp (DECL_SIZE_UNIT (decl), 1))
+	  if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
 	    return false;
 
-	  dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
+	  dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
 	  if (dsize > 32768)
 	    return false;
 
@@ -6137,8 +6162,8 @@ offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
 	  if (TREE_CODE (decl) == STRING_CST)
 	    dsize = TREE_STRING_LENGTH (decl);
 	  else if (TYPE_SIZE_UNIT (type)
-		   && host_integerp (TYPE_SIZE_UNIT (type), 1))
-	    dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
+		   && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
+	    dsize = tree_to_uhwi (TYPE_SIZE_UNIT (type));
 	  else
 	    return false;
 	  if (dsize > 32768)
@@ -6387,7 +6412,7 @@ legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict)
     {
       bool large_toc_ok;
 
-      if (DEFAULT_ABI != ABI_AIX && DEFAULT_ABI != ABI_DARWIN && flag_pic)
+      if (DEFAULT_ABI == ABI_V4 && flag_pic)
 	return false;
       /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
 	 push_reload from reload pass code.  LEGITIMIZE_RELOAD_ADDRESS
@@ -6974,10 +6999,13 @@ rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
 				   1, const0_rtx, Pmode);
 
 	  r3 = gen_rtx_REG (Pmode, 3);
-	  if (DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
-	    insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
-	  else if (DEFAULT_ABI == ABI_AIX && !TARGET_64BIT)
-	    insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
+	  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	    {
+	      if (TARGET_64BIT)
+		insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
+	      else
+		insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
+	    }
 	  else if (DEFAULT_ABI == ABI_V4)
 	    insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
 	  else
@@ -6996,10 +7024,13 @@ rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
 				   1, const0_rtx, Pmode);
 
 	  r3 = gen_rtx_REG (Pmode, 3);
-	  if (DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
-	    insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
-	  else if (DEFAULT_ABI == ABI_AIX && !TARGET_64BIT)
-	    insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
+	  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	    {
+	      if (TARGET_64BIT)
+		insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
+	      else
+		insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
+	    }
 	  else if (DEFAULT_ABI == ABI_V4)
 	    insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
 	  else
@@ -7626,7 +7657,7 @@ rs6000_conditional_register_usage (void)
 
   /* The TOC register is not killed across calls in a way that is
      visible to the compiler.  */
-  if (DEFAULT_ABI == ABI_AIX)
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
     call_really_used_regs[2] = 0;
 
   if (DEFAULT_ABI == ABI_V4
@@ -7958,6 +7989,7 @@ rs6000_emit_le_vsx_move (rtx dest, rtx source, enum machine_mode mode)
   gcc_assert (!BYTES_BIG_ENDIAN
 	      && VECTOR_MEM_VSX_P (mode)
 	      && mode != TImode
+	      && !gpr_or_gpr_p (dest, source)
 	      && (MEM_P (source) ^ MEM_P (dest)));
 
   if (MEM_P (source))
@@ -8168,7 +8200,9 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
 	}
       else if (INT_REGNO_P (REGNO (operands[1])))
 	{
-	  rtx mem = adjust_address_nv (operands[0], mode, 4);
+	  rtx mem = operands[0];
+	  if (BYTES_BIG_ENDIAN)
+	    mem = adjust_address_nv (mem, mode, 4);
 	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
 	  emit_insn (gen_movsd_hardfloat (mem, operands[1]));
 	}
@@ -8191,7 +8225,9 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
 	}
       else if (INT_REGNO_P (REGNO (operands[0])))
 	{
-	  rtx mem = adjust_address_nv (operands[1], mode, 4);
+	  rtx mem = operands[1];
+	  if (BYTES_BIG_ENDIAN)
+	    mem = adjust_address_nv (mem, mode, 4);
 	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
 	  emit_insn (gen_movsd_hardfloat (operands[0], mem));
 	}
@@ -8437,18 +8473,231 @@ rs6000_member_type_forces_blk (const_tree field, enum machine_mode mode)
 }
 
 /* Nonzero if we can use a floating-point register to pass this arg.  */
-#define USE_FP_FOR_ARG_P(CUM,MODE,TYPE)		\
+#define USE_FP_FOR_ARG_P(CUM,MODE)		\
   (SCALAR_FLOAT_MODE_P (MODE)			\
    && (CUM)->fregno <= FP_ARG_MAX_REG		\
    && TARGET_HARD_FLOAT && TARGET_FPRS)
 
 /* Nonzero if we can use an AltiVec register to pass this arg.  */
-#define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,TYPE,NAMED)		\
+#define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED)			\
   (ALTIVEC_OR_VSX_VECTOR_MODE (MODE)				\
    && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG			\
    && TARGET_ALTIVEC_ABI					\
    && (NAMED))
 
+/* Walk down the type tree of TYPE counting consecutive base elements.
+   If *MODEP is VOIDmode, then set it to the first valid floating point
+   or vector type.  If a non-floating point or vector type is found, or
+   if a floating point or vector type that doesn't match a non-VOIDmode
+   *MODEP is found, then return -1, otherwise return the count in the
+   sub-tree.  */
+
+static int
+rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
+{
+  enum machine_mode mode;
+  HOST_WIDE_INT size;
+
+  switch (TREE_CODE (type))
+    {
+    case REAL_TYPE:
+      mode = TYPE_MODE (type);
+      if (!SCALAR_FLOAT_MODE_P (mode))
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case COMPLEX_TYPE:
+      mode = TYPE_MODE (TREE_TYPE (type));
+      if (!SCALAR_FLOAT_MODE_P (mode))
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 2;
+
+      break;
+
+    case VECTOR_TYPE:
+      if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
+	return -1;
+
+      /* Use V4SImode as representative of all 128-bit vector types.  */
+      size = int_size_in_bytes (type);
+      switch (size)
+	{
+	case 16:
+	  mode = V4SImode;
+	  break;
+	default:
+	  return -1;
+	}
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      /* Vector modes are considered to be opaque: two vectors are
+	 equivalent for the purposes of being homogeneous aggregates
+	 if they are the same size.  */
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case ARRAY_TYPE:
+      {
+	int count;
+	tree index = TYPE_DOMAIN (type);
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
+	if (count == -1
+	    || !index
+	    || !TYPE_MAX_VALUE (index)
+	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
+	    || !TYPE_MIN_VALUE (index)
+	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
+	    || count < 0)
+	  return -1;
+
+	count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
+		      - tree_to_uhwi (TYPE_MIN_VALUE (index)));
+
+	/* There must be no padding.  */
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case RECORD_TYPE:
+      {
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count += sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case UNION_TYPE:
+    case QUAL_UNION_TYPE:
+      {
+	/* These aren't very interesting except in a degenerate case.  */
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count = count > sub_count ? count : sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!tree_fits_uhwi_p (TYPE_SIZE (type))
+	    || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    default:
+      break;
+    }
+
+  return -1;
+}
+
+/* If an argument, whose type is described by TYPE and MODE, is a homogeneous
+   float or vector aggregate that shall be passed in FP/vector registers
+   according to the ELFv2 ABI, return the homogeneous element mode in
+   *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
+
+   Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE.  */
+
+static bool
+rs6000_discover_homogeneous_aggregate (enum machine_mode mode, const_tree type,
+				       enum machine_mode *elt_mode,
+				       int *n_elts)
+{
+  /* Note that we do not accept complex types at the top level as
+     homogeneous aggregates; these types are handled via the
+     targetm.calls.split_complex_arg mechanism.  Complex types
+     can be elements of homogeneous aggregates, however.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
+    {
+      enum machine_mode field_mode = VOIDmode;
+      int field_count = rs6000_aggregate_candidate (type, &field_mode);
+
+      if (field_count > 0)
+	{
+	  int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
+			(GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
+
+	  /* The ELFv2 ABI allows homogeneous aggregates to occupy
+	     up to AGGR_ARG_NUM_REG registers.  */
+	  if (field_count * n_regs <= AGGR_ARG_NUM_REG)
+	    {
+	      if (elt_mode)
+		*elt_mode = field_mode;
+	      if (n_elts)
+		*n_elts = field_count;
+	      return true;
+	    }
+	}
+    }
+
+  if (elt_mode)
+    *elt_mode = mode;
+  if (n_elts)
+    *n_elts = 1;
+  return false;
+}
+
 /* Return a nonzero value to say to return the function value in
    memory, just as large structures are always returned.  TYPE will be
    the data type of the value, and FNTYPE will be the type of the
@@ -8501,6 +8750,16 @@ rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
       /* Otherwise fall through to more conventional ABI rules.  */
     }
 
+  /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
+  if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
+					     NULL, NULL))
+    return false;
+
+  /* The ELFv2 ABI returns aggregates up to 16B in registers */
+  if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
+      && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
+    return false;
+
   if (AGGREGATE_TYPE_P (type)
       && (aix_struct_return
 	  || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
@@ -8532,6 +8791,19 @@ rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
   return false;
 }
 
+/* Specify whether values returned in registers should be at the most
+   significant end of a register.  We want aggregates returned by
+   value to match the way aggregates are passed to functions.  */
+
+static bool
+rs6000_return_in_msb (const_tree valtype)
+{
+  return (DEFAULT_ABI == ABI_ELFv2
+	  && BYTES_BIG_ENDIAN
+	  && AGGREGATE_TYPE_P (valtype)
+	  && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
+}
+
 #ifdef HAVE_AS_GNU_ATTRIBUTE
 /* Return TRUE if a call to function FNDECL may be one that
    potentially affects the function calling ABI of the object file.  */
@@ -8668,7 +8940,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
 static bool
 rs6000_must_pass_in_stack (enum machine_mode mode, const_tree type)
 {
-  if (DEFAULT_ABI == ABI_AIX || TARGET_64BIT)
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
     return must_pass_in_stack_var_size (mode, type);
   else
     return must_pass_in_stack_var_size_or_pad (mode, type);
@@ -8749,6 +9021,11 @@ function_arg_padding (enum machine_mode mode, const_tree type)
 static unsigned int
 rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
 {
+  enum machine_mode elt_mode;
+  int n_elts;
+
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
   if (DEFAULT_ABI == ABI_V4
       && (GET_MODE_SIZE (mode) == 8
 	  || (TARGET_HARD_FLOAT
@@ -8760,11 +9037,12 @@ rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
 	       && int_size_in_bytes (type) >= 8
 	       && int_size_in_bytes (type) < 16))
     return 64;
-  else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+  else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
 	   || (type && TREE_CODE (type) == VECTOR_TYPE
 	       && int_size_in_bytes (type) >= 16))
     return 128;
   else if (((TARGET_MACHO && rs6000_darwin64_abi)
+	    || DEFAULT_ABI == ABI_ELFv2
             || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
  	   && mode == BLKmode
 	   && type && TYPE_ALIGN (type) > 64)
@@ -8773,6 +9051,16 @@ rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
     return PARM_BOUNDARY;
 }
 
+/* The offset in words to the start of the parameter save area.  */
+
+static unsigned int
+rs6000_parm_offset (void)
+{
+  return (DEFAULT_ABI == ABI_V4 ? 2
+	  : DEFAULT_ABI == ABI_ELFv2 ? 4
+	  : 6);
+}
+
 /* For a function parm of MODE and TYPE, return the starting word in
    the parameter area.  NWORDS of the parameter area are already used.  */
 
@@ -8781,11 +9069,9 @@ rs6000_parm_start (enum machine_mode mode, const_tree type,
 		   unsigned int nwords)
 {
   unsigned int align;
-  unsigned int parm_offset;
 
   align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
-  parm_offset = DEFAULT_ABI == ABI_V4 ? 2 : 6;
-  return nwords + (-(parm_offset + nwords) & align);
+  return nwords + (-(rs6000_parm_offset () + nwords) & align);
 }
 
 /* Compute the size (in words) of a function argument.  */
@@ -8885,14 +9171,14 @@ rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
 	mode = TYPE_MODE (ftype);
 
 	if (DECL_SIZE (f) != 0
-	    && host_integerp (bit_position (f), 1))
+	    && tree_fits_uhwi_p (bit_position (f)))
 	  bitpos += int_bit_position (f);
 
 	/* ??? FIXME: else assume zero offset.  */
 
 	if (TREE_CODE (ftype) == RECORD_TYPE)
 	  rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
-	else if (USE_FP_FOR_ARG_P (cum, mode, ftype))
+	else if (USE_FP_FOR_ARG_P (cum, mode))
 	  {
 	    unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
 	    rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
@@ -8933,7 +9219,7 @@ rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
 	    else
 	      cum->words += n_fpregs;
 	  }
-	else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, 1))
+	else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
 	  {
 	    rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
 	    cum->vregno++;
@@ -8970,6 +9256,11 @@ static void
 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 			       const_tree type, bool named, int depth)
 {
+  enum machine_mode elt_mode;
+  int n_elts;
+
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
   /* Only tick off an argument if we're not recursing.  */
   if (depth == 0)
     cum->nargs_prototype--;
@@ -8990,15 +9281,16 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 #endif
 
   if (TARGET_ALTIVEC_ABI
-      && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+      && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
 	  || (type && TREE_CODE (type) == VECTOR_TYPE
 	      && int_size_in_bytes (type) == 16)))
     {
       bool stack = false;
 
-      if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named))
+      if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
 	{
-	  cum->vregno++;
+	  cum->vregno += n_elts;
+
 	  if (!TARGET_ALTIVEC)
 	    error ("cannot pass argument in vector register because"
 		   " altivec instructions are disabled, use -maltivec"
@@ -9007,7 +9299,8 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 	  /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
 	     even if it is going to be passed in a vector register.
 	     Darwin does the same for variable-argument functions.  */
-	  if ((DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
+	  if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	       && TARGET_64BIT)
 	      || (cum->stdarg && DEFAULT_ABI != ABI_V4))
 	    stack = true;
 	}
@@ -9018,15 +9311,13 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 	{
 	  int align;
 
-	  /* Vector parameters must be 16-byte aligned.  This places
-	     them at 2 mod 4 in terms of words in 32-bit mode, since
-	     the parameter save area starts at offset 24 from the
-	     stack.  In 64-bit mode, they just have to start on an
-	     even word, since the parameter save area is 16-byte
-	     aligned.  Space for GPRs is reserved even if the argument
-	     will be passed in memory.  */
+	  /* Vector parameters must be 16-byte aligned.  In 32-bit
+	     mode this means we need to take into account the offset
+	     to the parameter save area.  In 64-bit mode, they just
+	     have to start on an even word, since the parameter save
+	     area is 16-byte aligned.  */
 	  if (TARGET_32BIT)
-	    align = (2 - cum->words) & 3;
+	    align = -(rs6000_parm_offset () + cum->words) & 3;
 	  else
 	    align = cum->words & 1;
 	  cum->words += align + rs6000_arg_size (mode, type);
@@ -9151,15 +9442,15 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 
       cum->words = align_words + n_words;
 
-      if (SCALAR_FLOAT_MODE_P (mode)
+      if (SCALAR_FLOAT_MODE_P (elt_mode)
 	  && TARGET_HARD_FLOAT && TARGET_FPRS)
 	{
 	  /* _Decimal128 must be passed in an even/odd float register pair.
 	     This assumes that the register number is odd when fregno is
 	     odd.  */
-	  if (mode == TDmode && (cum->fregno % 2) == 1)
+	  if (elt_mode == TDmode && (cum->fregno % 2) == 1)
 	    cum->fregno++;
-	  cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
+	  cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
 	}
 
       if (TARGET_DEBUG_ARG)
@@ -9362,14 +9653,14 @@ rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
 	mode = TYPE_MODE (ftype);
 
 	if (DECL_SIZE (f) != 0
-	    && host_integerp (bit_position (f), 1))
+	    && tree_fits_uhwi_p (bit_position (f)))
 	  bitpos += int_bit_position (f);
 
 	/* ??? FIXME: else assume zero offset.  */
 
 	if (TREE_CODE (ftype) == RECORD_TYPE)
 	  rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
-	else if (cum->named && USE_FP_FOR_ARG_P (cum, mode, ftype))
+	else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
 	  {
 	    unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
 #if 0
@@ -9397,7 +9688,7 @@ rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
 	    if (mode == TFmode || mode == TDmode)
 	      cum->fregno++;
 	  }
-	else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, ftype, 1))
+	else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
 	  {
 	    rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
 	    rvec[(*k)++]
@@ -9514,6 +9805,84 @@ rs6000_mixed_function_arg (enum machine_mode mode, const_tree type,
   return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
 }
 
+/* We have an argument of MODE and TYPE that goes into FPRs or VRs,
+   but must also be copied into the parameter save area starting at
+   offset ALIGN_WORDS.  Fill in RVEC with the elements corresponding
+   to the GPRs and/or memory.  Return the number of elements used.  */
+
+static int
+rs6000_psave_function_arg (enum machine_mode mode, const_tree type,
+			   int align_words, rtx *rvec)
+{
+  int k = 0;
+
+  if (align_words < GP_ARG_NUM_REG)
+    {
+      int n_words = rs6000_arg_size (mode, type);
+
+      if (align_words + n_words > GP_ARG_NUM_REG
+	  || mode == BLKmode
+	  || (TARGET_32BIT && TARGET_POWERPC64))
+	{
+	  /* If this is partially on the stack, then we only
+	     include the portion actually in registers here.  */
+	  enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
+	  int i = 0;
+
+	  if (align_words + n_words > GP_ARG_NUM_REG)
+	    {
+	      /* Not all of the arg fits in gprs.  Say that it goes in memory
+		 too, using a magic NULL_RTX component.  Also see comment in
+		 rs6000_mixed_function_arg for why the normal
+		 function_arg_partial_nregs scheme doesn't work in this case. */
+	      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+	    }
+
+	  do
+	    {
+	      rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
+	      rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
+	      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+	    }
+	  while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
+	}
+      else
+	{
+	  /* The whole arg fits in gprs.  */
+	  rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
+	  rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
+	}
+    }
+  else
+    {
+      /* It's entirely in memory.  */
+      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+    }
+
+  return k;
+}
+
+/* RVEC is a vector of K components of an argument of mode MODE.
+   Construct the final function_arg return value from it.  */
+
+static rtx
+rs6000_finish_function_arg (enum machine_mode mode, rtx *rvec, int k)
+{
+  gcc_assert (k >= 1);
+
+  /* Avoid returning a PARALLEL in the trivial cases.  */
+  if (k == 1)
+    {
+      if (XEXP (rvec[0], 0) == NULL_RTX)
+	return NULL_RTX;
+
+      if (GET_MODE (XEXP (rvec[0], 0)) == mode)
+	return XEXP (rvec[0], 0);
+    }
+
+  return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
+}
+
 /* Determine where to put an argument to a function.
    Value is zero to push the argument on the stack,
    or a hard register in which to store the argument.
@@ -9548,6 +9917,8 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
 {
   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   enum rs6000_abi abi = DEFAULT_ABI;
+  enum machine_mode elt_mode;
+  int n_elts;
 
   /* Return a marker to indicate whether CR1 needs to set or clear the
      bit that V.4 uses to say fp args were passed in registers.
@@ -9574,6 +9945,8 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
       return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
     }
 
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
   if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
     {
       rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
@@ -9582,33 +9955,30 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
       /* Else fall through to usual handling.  */
     }
 
-  if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named))
-    if (TARGET_64BIT && ! cum->prototype)
-      {
-	/* Vector parameters get passed in vector register
-	   and also in GPRs or memory, in absence of prototype.  */
-	int align_words;
-	rtx slot;
-	align_words = (cum->words + 1) & ~1;
+  if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
+    {
+      rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
+      rtx r, off;
+      int i, k = 0;
 
-	if (align_words >= GP_ARG_NUM_REG)
-	  {
-	    slot = NULL_RTX;
-	  }
-	else
-	  {
-	    slot = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
-	  }
-	return gen_rtx_PARALLEL (mode,
-		 gen_rtvec (2,
-			    gen_rtx_EXPR_LIST (VOIDmode,
-					       slot, const0_rtx),
-			    gen_rtx_EXPR_LIST (VOIDmode,
-					       gen_rtx_REG (mode, cum->vregno),
-					       const0_rtx)));
-      }
-    else
-      return gen_rtx_REG (mode, cum->vregno);
+      /* Do we also need to pass this argument in the parameter
+	 save area?  */
+      if (TARGET_64BIT && ! cum->prototype)
+	{
+	  int align_words = (cum->words + 1) & ~1;
+	  k = rs6000_psave_function_arg (mode, type, align_words, rvec);
+	}
+
+      /* Describe where this argument goes in the vector registers.  */
+      for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
+	{
+	  r = gen_rtx_REG (elt_mode, cum->vregno + i);
+	  off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+	  rvec[k++] =  gen_rtx_EXPR_LIST (VOIDmode, r, off);
+	}
+
+      return rs6000_finish_function_arg (mode, rvec, k);
+    }
   else if (TARGET_ALTIVEC_ABI
 	   && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
 	       || (type && TREE_CODE (type) == VECTOR_TYPE
@@ -9623,13 +9993,13 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
 	  int align, align_words, n_words;
 	  enum machine_mode part_mode;
 
-	  /* Vector parameters must be 16-byte aligned.  This places them at
-	     2 mod 4 in terms of words in 32-bit mode, since the parameter
-	     save area starts at offset 24 from the stack.  In 64-bit mode,
-	     they just have to start on an even word, since the parameter
-	     save area is 16-byte aligned.  */
+	  /* Vector parameters must be 16-byte aligned.  In 32-bit
+	     mode this means we need to take into account the offset
+	     to the parameter save area.  In 64-bit mode, they just
+	     have to start on an even word, since the parameter save
+	     area is 16-byte aligned.  */
 	  if (TARGET_32BIT)
-	    align = (2 - cum->words) & 3;
+	    align = -(rs6000_parm_offset () + cum->words) & 3;
 	  else
 	    align = cum->words & 1;
 	  align_words = cum->words + align;
@@ -9707,101 +10077,50 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
 
       /* _Decimal128 must be passed in an even/odd float register pair.
 	 This assumes that the register number is odd when fregno is odd.  */
-      if (mode == TDmode && (cum->fregno % 2) == 1)
+      if (elt_mode == TDmode && (cum->fregno % 2) == 1)
 	cum->fregno++;
 
-      if (USE_FP_FOR_ARG_P (cum, mode, type))
+      if (USE_FP_FOR_ARG_P (cum, elt_mode))
 	{
-	  rtx rvec[GP_ARG_NUM_REG + 1];
-	  rtx r;
-	  int k;
-	  bool needs_psave;
-	  enum machine_mode fmode = mode;
-	  unsigned long n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
+	  rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
+	  rtx r, off;
+	  int i, k = 0;
+	  unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
 
-	  if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
-	    {
-	      /* Currently, we only ever need one reg here because complex
-		 doubles are split.  */
-	      gcc_assert (cum->fregno == FP_ARG_MAX_REG
-			  && (fmode == TFmode || fmode == TDmode));
-
-	      /* Long double or _Decimal128 split over regs and memory.  */
-	      fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
-	    }
-
-	  /* Do we also need to pass this arg in the parameter save
-	     area?  */
-	  needs_psave = (type
-			 && (cum->nargs_prototype <= 0
-			     || (DEFAULT_ABI == ABI_AIX
-				 && TARGET_XL_COMPAT
-				 && align_words >= GP_ARG_NUM_REG)));
+	  /* Do we also need to pass this argument in the parameter
+	     save area?  */
+	  if (type && (cum->nargs_prototype <= 0
+		       || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+			   && TARGET_XL_COMPAT
+			   && align_words >= GP_ARG_NUM_REG)))
+	    k = rs6000_psave_function_arg (mode, type, align_words, rvec);
 
-	  if (!needs_psave && mode == fmode)
-	    return gen_rtx_REG (fmode, cum->fregno);
-
-	  k = 0;
-	  if (needs_psave)
+	  /* Describe where this argument goes in the fprs.  */
+	  for (i = 0; i < n_elts
+		      && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
 	    {
-	      /* Describe the part that goes in gprs or the stack.
-		 This piece must come first, before the fprs.  */
-	      if (align_words < GP_ARG_NUM_REG)
+	      /* Check if the argument is split over registers and memory.
+		 This can only ever happen for long double or _Decimal128;
+		 complex types are handled via split_complex_arg.  */
+	      enum machine_mode fmode = elt_mode;
+	      if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
 		{
-		  unsigned long n_words = rs6000_arg_size (mode, type);
-
-		  if (align_words + n_words > GP_ARG_NUM_REG
-		      || (TARGET_32BIT && TARGET_POWERPC64))
-		    {
-		      /* If this is partially on the stack, then we only
-			 include the portion actually in registers here.  */
-		      enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
-		      rtx off;
-		      int i = 0;
-		      if (align_words + n_words > GP_ARG_NUM_REG)
-			/* Not all of the arg fits in gprs.  Say that it
-			   goes in memory too, using a magic NULL_RTX
-			   component.  Also see comment in
-			   rs6000_mixed_function_arg for why the normal
-			   function_arg_partial_nregs scheme doesn't work
-			   in this case. */
-			rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX,
-						       const0_rtx);
-		      do
-			{
-			  r = gen_rtx_REG (rmode,
-					   GP_ARG_MIN_REG + align_words);
-			  off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
-			  rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
-			}
-		      while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
-		    }
-		  else
-		    {
-		      /* The whole arg fits in gprs.  */
-		      r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
-		      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
-		    }
+		  gcc_assert (fmode == TFmode || fmode == TDmode);
+		  fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
 		}
-	      else
-		/* It's entirely in memory.  */
-		rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
-	    }
 
-	  /* Describe where this piece goes in the fprs.  */
-	  r = gen_rtx_REG (fmode, cum->fregno);
-	  rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
+	      r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
+	      off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+	      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+	    }
 
-	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
+	  return rs6000_finish_function_arg (mode, rvec, k);
 	}
       else if (align_words < GP_ARG_NUM_REG)
 	{
 	  if (TARGET_32BIT && TARGET_POWERPC64)
 	    return rs6000_mixed_function_arg (mode, type, align_words);
 
-	  if (mode == BLKmode)
-	    mode = Pmode;
-
 	  return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
 	}
       else
@@ -9820,15 +10139,31 @@ rs6000_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
 			  tree type, bool named)
 {
   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  bool passed_in_gprs = true;
   int ret = 0;
   int align_words;
+  enum machine_mode elt_mode;
+  int n_elts;
+
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
 
   if (DEFAULT_ABI == ABI_V4)
     return 0;
 
-  if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named)
-      && cum->nargs_prototype >= 0)
-    return 0;
+  if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
+    {
+      /* If we are passing this arg in the fixed parameter save area
+         (gprs or memory) as well as VRs, we do not use the partial
+	 bytes mechanism; instead, rs6000_function_arg will return a
+	 PARALLEL including a memory element as necessary.  */
+      if (TARGET_64BIT && ! cum->prototype)
+	return 0;
+
+      /* Otherwise, we pass in VRs only.  Check for partial copies.  */
+      passed_in_gprs = false;
+      if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
+	ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
+    }
 
   /* In this complicated case we just disable the partial_nregs code.  */
   if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
@@ -9836,26 +10171,30 @@ rs6000_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
 
   align_words = rs6000_parm_start (mode, type, cum->words);
 
-  if (USE_FP_FOR_ARG_P (cum, mode, type))
+  if (USE_FP_FOR_ARG_P (cum, elt_mode))
     {
+      unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
+
       /* If we are passing this arg in the fixed parameter save area
-	 (gprs or memory) as well as fprs, then this function should
-	 return the number of partial bytes passed in the parameter
-	 save area rather than partial bytes passed in fprs.  */
+         (gprs or memory) as well as FPRs, we do not use the partial
+	 bytes mechanism; instead, rs6000_function_arg will return a
+	 PARALLEL including a memory element as necessary.  */
       if (type
 	  && (cum->nargs_prototype <= 0
-	      || (DEFAULT_ABI == ABI_AIX
+	      || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
 		  && TARGET_XL_COMPAT
 		  && align_words >= GP_ARG_NUM_REG)))
 	return 0;
-      else if (cum->fregno + ((GET_MODE_SIZE (mode) + 7) >> 3)
-	       > FP_ARG_MAX_REG + 1)
-	ret = (FP_ARG_MAX_REG + 1 - cum->fregno) * 8;
-      else if (cum->nargs_prototype >= 0)
-	return 0;
+
+      /* Otherwise, we pass in FPRs only.  Check for partial copies.  */
+      passed_in_gprs = false;
+      if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
+	ret = ((FP_ARG_MAX_REG + 1 - cum->fregno)
+	       * MIN (8, GET_MODE_SIZE (elt_mode)));
     }
 
-  if (align_words < GP_ARG_NUM_REG
+  if (passed_in_gprs
+      && align_words < GP_ARG_NUM_REG
       && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
     ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
 
@@ -9936,6 +10275,139 @@ rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
   return 0;
 }
 
+/* Process parameter of type TYPE after ARGS_SO_FAR parameters were
+   already processes.  Return true if the parameter must be passed
+   (fully or partially) on the stack.  */
+
+static bool
+rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
+{
+  enum machine_mode mode;
+  int unsignedp;
+  rtx entry_parm;
+
+  /* Catch errors.  */
+  if (type == NULL || type == error_mark_node)
+    return true;
+
+  /* Handle types with no storage requirement.  */
+  if (TYPE_MODE (type) == VOIDmode)
+    return false;
+
+  /* Handle complex types.  */
+  if (TREE_CODE (type) == COMPLEX_TYPE)
+    return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
+	    || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
+
+  /* Handle transparent aggregates.  */
+  if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
+      && TYPE_TRANSPARENT_AGGR (type))
+    type = TREE_TYPE (first_field (type));
+
+  /* See if this arg was passed by invisible reference.  */
+  if (pass_by_reference (get_cumulative_args (args_so_far),
+			 TYPE_MODE (type), type, true))
+    type = build_pointer_type (type);
+
+  /* Find mode as it is passed by the ABI.  */
+  unsignedp = TYPE_UNSIGNED (type);
+  mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
+
+  /* If we must pass in stack, we need a stack.  */
+  if (rs6000_must_pass_in_stack (mode, type))
+    return true;
+
+  /* If there is no incoming register, we need a stack.  */
+  entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
+  if (entry_parm == NULL)
+    return true;
+
+  /* Likewise if we need to pass both in registers and on the stack.  */
+  if (GET_CODE (entry_parm) == PARALLEL
+      && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
+    return true;
+
+  /* Also true if we're partially in registers and partially not.  */
+  if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
+    return true;
+
+  /* Update info on where next arg arrives in registers.  */
+  rs6000_function_arg_advance (args_so_far, mode, type, true);
+  return false;
+}
+
+/* Return true if FUN has no prototype, has a variable argument
+   list, or passes any parameter in memory.  */
+
+static bool
+rs6000_function_parms_need_stack (tree fun)
+{
+  function_args_iterator args_iter;
+  tree arg_type;
+  CUMULATIVE_ARGS args_so_far_v;
+  cumulative_args_t args_so_far;
+
+  if (!fun)
+    /* Must be a libcall, all of which only use reg parms.  */
+    return false;
+  if (!TYPE_P (fun))
+    fun = TREE_TYPE (fun);
+
+  /* Varargs functions need the parameter save area.  */
+  if (!prototype_p (fun) || stdarg_p (fun))
+    return true;
+
+  INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fun, NULL_RTX);
+  args_so_far = pack_cumulative_args (&args_so_far_v);
+
+  if (aggregate_value_p (TREE_TYPE (fun), fun))
+    {
+      tree type = build_pointer_type (TREE_TYPE (fun));
+      rs6000_parm_needs_stack (args_so_far, type);
+    }
+
+  FOREACH_FUNCTION_ARGS (fun, arg_type, args_iter)
+    if (rs6000_parm_needs_stack (args_so_far, arg_type))
+      return true;
+
+  return false;
+}
+
+/* Return the size of the REG_PARM_STACK_SPACE are for FUN.  This is
+   usually a constant depending on the ABI.  However, in the ELFv2 ABI
+   the register parameter area is optional when calling a function that
+   has a prototype is scope, has no variable argument list, and passes
+   all parameters in registers.  */
+
+int
+rs6000_reg_parm_stack_space (tree fun)
+{
+  int reg_parm_stack_space;
+
+  switch (DEFAULT_ABI)
+    {
+    default:
+      reg_parm_stack_space = 0;
+      break;
+
+    case ABI_AIX:
+    case ABI_DARWIN:
+      reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
+      break;
+
+    case ABI_ELFv2:
+      /* ??? Recomputing this every time is a bit expensive.  Is there
+	 a place to cache this information?  */
+      if (rs6000_function_parms_need_stack (fun))
+	reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
+      else
+	reg_parm_stack_space = 0;
+      break;
+    }
+
+  return reg_parm_stack_space;
+}
+
 static void
 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
 {
@@ -10315,6 +10787,7 @@ rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
 
   if (((TARGET_MACHO
         && rs6000_darwin64_abi)
+       || DEFAULT_ABI == ABI_ELFv2
        || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
       && integer_zerop (TYPE_SIZE (type)))
     {
@@ -11925,8 +12398,8 @@ get_element_number (tree vec_type, tree arg)
 {
   unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
 
-  if (!host_integerp (arg, 1)
-      || (elt = tree_low_cst (arg, 1), elt > max))
+  if (!tree_fits_uhwi_p (arg)
+      || (elt = tree_to_uhwi (arg), elt > max))
     {
       error ("selector must be an integer constant in the range 0..%wi", max);
       return 0;
@@ -16203,6 +16676,13 @@ rs6000_cannot_change_mode_class (enum machine_mode from,
 	  if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
 	    return true;
 
+	  /* TDmode in floating-mode registers must always go into a register
+	     pair with the most significant word in the even-numbered register
+	     to match ISA requirements.  In little-endian mode, this does not
+	     match subreg numbering, so we cannot allow subregs.  */
+	  if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
+	    return true;
+
 	  if (from_size < 8 || to_size < 8)
 	    return true;
 
@@ -16645,6 +17125,7 @@ rs6000_output_function_entry (FILE *file, const char *fname)
 	    ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
 	  break;
 
+	case ABI_ELFv2:
 	case ABI_V4:
 	case ABI_DARWIN:
 	  break;
@@ -19144,6 +19625,39 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
   gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
 
+  /* TDmode residing in FP registers is special, since the ISA requires that
+     the lower-numbered word of a register pair is always the most significant
+     word, even in little-endian mode.  This does not match the usual subreg
+     semantics, so we cannnot use simplify_gen_subreg in those cases.  Access
+     the appropriate constituent registers "by hand" in little-endian mode.
+
+     Note we do not need to check for destructive overlap here since TDmode
+     can only reside in even/odd register pairs.  */
+  if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
+    {
+      rtx p_src, p_dst;
+      int i;
+
+      for (i = 0; i < nregs; i++)
+	{
+	  if (REG_P (src) && FP_REGNO_P (REGNO (src)))
+	    p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
+	  else
+	    p_src = simplify_gen_subreg (reg_mode, src, mode,
+					 i * reg_mode_size);
+
+	  if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
+	    p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
+	  else
+	    p_dst = simplify_gen_subreg (reg_mode, dst, mode,
+					 i * reg_mode_size);
+
+	  emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
+	}
+
+      return;
+    }
+
   if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
     {
       /* Move register range backwards, if we might have destructive
@@ -19597,7 +20111,7 @@ rs6000_savres_strategy (rs6000_stack_t *info,
     }
   else
     {
-      gcc_checking_assert (DEFAULT_ABI == ABI_AIX);
+      gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
       if (info->first_fp_reg_save > 61)
 	strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
       strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
@@ -19608,7 +20122,8 @@ rs6000_savres_strategy (rs6000_stack_t *info,
      by the static chain.  It would require too much fiddling and the
      static chain is rarely used anyway.  FPRs are saved w.r.t the stack
      pointer on Darwin, and AIX uses r1 or r12.  */
-  if (using_static_chain_p && DEFAULT_ABI != ABI_AIX)
+  if (using_static_chain_p
+      && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
     strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
 		 | SAVE_INLINE_GPRS
 		 | SAVE_INLINE_VRS | REST_INLINE_VRS);
@@ -19741,6 +20256,34 @@ rs6000_savres_strategy (rs6000_stack_t *info,
    The required alignment for AIX configurations is two words (i.e., 8
    or 16 bytes).
 
+   The ELFv2 ABI is a variant of the AIX ABI.  Stack frames look like:
+
+	SP---->	+---------------------------------------+
+		| Back chain to caller			|  0
+		+---------------------------------------+
+		| Save area for CR			|  8
+		+---------------------------------------+
+		| Saved LR				|  16
+		+---------------------------------------+
+		| Saved TOC pointer			|  24
+		+---------------------------------------+
+		| Parameter save area (P)		|  32
+		+---------------------------------------+
+		| Alloca space (A)			|  32+P
+		+---------------------------------------+
+		| Local variable space (L)		|  32+P+A
+		+---------------------------------------+
+		| Save area for AltiVec registers (W)	|  32+P+A+L
+		+---------------------------------------+
+		| AltiVec alignment padding (Y)		|  32+P+A+L+W
+		+---------------------------------------+
+		| Save area for GP registers (G)	|  32+P+A+L+W+Y
+		+---------------------------------------+
+		| Save area for FP registers (F)	|  32+P+A+L+W+Y+G
+		+---------------------------------------+
+	old SP->| back chain to caller's caller		|  32+P+A+L+W+Y+G+F
+		+---------------------------------------+
+
 
    V.4 stack frames look like:
 
@@ -19801,6 +20344,7 @@ rs6000_stack_info (void)
   rs6000_stack_t *info_ptr = &stack_info;
   int reg_size = TARGET_32BIT ? 4 : 8;
   int ehrd_size;
+  int ehcr_size;
   int save_align;
   int first_gp;
   HOST_WIDE_INT non_fixed_size;
@@ -19894,6 +20438,18 @@ rs6000_stack_info (void)
   else
     ehrd_size = 0;
 
+  /* In the ELFv2 ABI, we also need to allocate space for separate
+     CR field save areas if the function calls __builtin_eh_return.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+    {
+      /* This hard-codes that we have three call-saved CR fields.  */
+      ehcr_size = 3 * reg_size;
+      /* We do *not* use the regular CR save mechanism.  */
+      info_ptr->cr_save_p = 0;
+    }
+  else
+    ehcr_size = 0;
+
   /* Determine various sizes.  */
   info_ptr->reg_size     = reg_size;
   info_ptr->fixed_size   = RS6000_SAVE_AREA;
@@ -19933,6 +20489,7 @@ rs6000_stack_info (void)
       gcc_unreachable ();
 
     case ABI_AIX:
+    case ABI_ELFv2:
     case ABI_DARWIN:
       info_ptr->fp_save_offset   = - info_ptr->fp_size;
       info_ptr->gp_save_offset   = info_ptr->fp_save_offset - info_ptr->gp_size;
@@ -19962,6 +20519,8 @@ rs6000_stack_info (void)
 	}
       else
 	info_ptr->ehrd_offset      = info_ptr->gp_save_offset - ehrd_size;
+
+      info_ptr->ehcr_offset      = info_ptr->ehrd_offset - ehcr_size;
       info_ptr->cr_save_offset   = reg_size; /* first word when 64-bit.  */
       info_ptr->lr_save_offset   = 2*reg_size;
       break;
@@ -20024,6 +20583,7 @@ rs6000_stack_info (void)
 					 + info_ptr->spe_gp_size
 					 + info_ptr->spe_padding_size
 					 + ehrd_size
+					 + ehcr_size
 					 + info_ptr->cr_size
 					 + info_ptr->vrsave_size,
 					 save_align);
@@ -20037,7 +20597,7 @@ rs6000_stack_info (void)
 
   /* Determine if we need to save the link register.  */
   if (info_ptr->calls_p
-      || (DEFAULT_ABI == ABI_AIX
+      || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
 	  && crtl->profile
 	  && !TARGET_PROFILE_KERNEL)
       || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
@@ -20183,6 +20743,7 @@ debug_stack_info (rs6000_stack_t *info)
     default:		 abi_string = "Unknown";	break;
     case ABI_NONE:	 abi_string = "NONE";		break;
     case ABI_AIX:	 abi_string = "AIX";		break;
+    case ABI_ELFv2:	 abi_string = "ELFv2";		break;
     case ABI_DARWIN:	 abi_string = "Darwin";		break;
     case ABI_V4:	 abi_string = "V.4";		break;
     }
@@ -20304,7 +20865,8 @@ rs6000_return_addr (int count, rtx frame)
   /* Currently we don't optimize very well between prolog and body
      code and for PIC code the code can be actually quite bad, so
      don't try to be too clever here.  */
-  if (count != 0 || (DEFAULT_ABI != ABI_AIX && flag_pic))
+  if (count != 0
+      || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
     {
       cfun->machine->ra_needs_full_frame = 1;
 
@@ -20363,13 +20925,13 @@ rs6000_function_ok_for_sibcall (tree decl, tree exp)
 	return false;
     }
 
-  /* Under the AIX ABI we can't allow calls to non-local functions,
-     because the callee may have a different TOC pointer to the
-     caller and there's no way to ensure we restore the TOC when we
-     return.  With the secure-plt SYSV ABI we can't make non-local
+  /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
+     functions, because the callee may have a different TOC pointer to
+     the caller and there's no way to ensure we restore the TOC when
+     we return.  With the secure-plt SYSV ABI we can't make non-local
      calls when -fpic/PIC because the plt call stubs use r30.  */
   if (DEFAULT_ABI == ABI_DARWIN
-      || (DEFAULT_ABI == ABI_AIX
+      || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
 	  && decl
 	  && !DECL_EXTERNAL (decl)
 	  && (*targetm.binds_local_p) (decl))
@@ -20452,7 +21014,7 @@ rs6000_emit_load_toc_table (int fromprolog)
   rtx dest;
   dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
 
-  if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic)
+  if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
     {
       char buf[30];
       rtx lab, tmp1, tmp2, got;
@@ -20480,7 +21042,7 @@ rs6000_emit_load_toc_table (int fromprolog)
       emit_insn (gen_load_toc_v4_pic_si ());
       emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
     }
-  else if (TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2)
+  else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
     {
       char buf[30];
       rtx temp0 = (fromprolog
@@ -20528,7 +21090,7 @@ rs6000_emit_load_toc_table (int fromprolog)
     }
   else
     {
-      gcc_assert (DEFAULT_ABI == ABI_AIX);
+      gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
 
       if (TARGET_32BIT)
 	emit_insn (gen_load_toc_aix_si (dest));
@@ -20933,7 +21495,7 @@ output_probe_stack_range (rtx reg1, rtx reg2)
 
 static rtx
 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
-		      rtx reg2, rtx rreg)
+		      rtx reg2, rtx rreg, rtx split_reg)
 {
   rtx real, temp;
 
@@ -21024,6 +21586,11 @@ rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
 	  }
     }
 
+  /* If a store insn has been split into multiple insns, the
+     true source register is given by split_reg.  */
+  if (split_reg != NULL_RTX)
+    real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
+
   RTX_FRAME_RELATED_P (insn) = 1;
   add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
 
@@ -21131,7 +21698,7 @@ emit_frame_save (rtx frame_reg, enum machine_mode mode,
   reg = gen_rtx_REG (mode, regno);
   insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
   return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
-			       NULL_RTX, NULL_RTX);
+			       NULL_RTX, NULL_RTX, NULL_RTX);
 }
 
 /* Emit an offset memory reference suitable for a frame store, while
@@ -21247,7 +21814,7 @@ rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
       if ((sel & SAVRES_LR))
 	suffix = "_x";
     }
-  else if (DEFAULT_ABI == ABI_AIX)
+  else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
     {
 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
       /* No out-of-line save/restore routines for GPRs on AIX.  */
@@ -21388,7 +21955,7 @@ rs6000_emit_stack_reset (rs6000_stack_t *info,
 static inline unsigned
 ptr_regno_for_savres (int sel)
 {
-  if (DEFAULT_ABI == ABI_AIX)
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
     return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
   return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
 }
@@ -21473,6 +22040,43 @@ rs6000_emit_savres_rtx (rs6000_stack_t *info,
   return insn;
 }
 
+/* Emit code to store CR fields that need to be saved into REG.  */
+
+static void
+rs6000_emit_move_from_cr (rtx reg)
+{
+  /* Only the ELFv2 ABI allows storing only selected fields.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
+    {
+      int i, cr_reg[8], count = 0;
+
+      /* Collect CR fields that must be saved.  */
+      for (i = 0; i < 8; i++)
+	if (save_reg_p (CR0_REGNO + i))
+	  cr_reg[count++] = i;
+
+      /* If it's just a single one, use mfcrf.  */
+      if (count == 1)
+	{
+	  rtvec p = rtvec_alloc (1);
+	  rtvec r = rtvec_alloc (2);
+	  RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
+	  RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
+	  RTVEC_ELT (p, 0)
+	    = gen_rtx_SET (VOIDmode, reg,
+			   gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
+
+	  emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+	  return;
+	}
+
+      /* ??? It might be better to handle count == 2 / 3 cases here
+	 as well, using logical operations to combine the values.  */
+    }
+
+  emit_insn (gen_movesi_from_cr (reg));
+}
+
 /* Determine whether the gp REG is really used.  */
 
 static bool
@@ -21538,6 +22142,17 @@ rs6000_emit_prologue (void)
 #define NOT_INUSE(R) do {} while (0)
 #endif
 
+  if (DEFAULT_ABI == ABI_ELFv2)
+    {
+      cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
+
+      /* With -mminimal-toc we may generate an extra use of r2 below.  */
+      if (!TARGET_SINGLE_PIC_BASE
+	  && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
+	cfun->machine->r2_setup_needed = true;
+    }
+
+
   if (flag_stack_usage_info)
     current_function_static_stack_size = info->total_size;
 
@@ -21663,7 +22278,7 @@ rs6000_emit_prologue (void)
 
       insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
       rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-			    treg, GEN_INT (-info->total_size));
+			    treg, GEN_INT (-info->total_size), NULL_RTX);
       sp_off = frame_off = info->total_size;
     }
 
@@ -21748,14 +22363,14 @@ rs6000_emit_prologue (void)
 
 	  insn = emit_move_insn (mem, reg);
 	  rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-				NULL_RTX, NULL_RTX);
+				NULL_RTX, NULL_RTX, NULL_RTX);
 	  END_USE (0);
 	}
     }
 
   /* If we need to save CR, put it into r12 or r11.  Choose r12 except when
      r12 will be needed by out-of-line gpr restore.  */
-  cr_save_regno = (DEFAULT_ABI == ABI_AIX
+  cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
 		   && !(strategy & (SAVE_INLINE_GPRS
 				    | SAVE_NOINLINE_GPRS_SAVES_LR))
 		   ? 11 : 12);
@@ -21764,21 +22379,9 @@ rs6000_emit_prologue (void)
       && REGNO (frame_reg_rtx) != cr_save_regno
       && !(using_static_chain_p && cr_save_regno == 11))
     {
-      rtx set;
-
       cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
       START_USE (cr_save_regno);
-      insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
-      RTX_FRAME_RELATED_P (insn) = 1;
-      /* Now, there's no way that dwarf2out_frame_debug_expr is going
-	 to understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)'.
-	 But that's OK.  All we have to do is specify that _one_ condition
-	 code register is saved in this stack slot.  The thrower's epilogue
-	 will then restore all the call-saved registers.
-	 We use CR2_REGNO (70) to be compatible with gcc-2.95 on Linux.  */
-      set = gen_rtx_SET (VOIDmode, cr_save_rtx,
-			 gen_rtx_REG (SImode, CR2_REGNO));
-      add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
+      rs6000_emit_move_from_cr (cr_save_rtx);
     }
 
   /* Do any required saving of fpr's.  If only one or two to save, do
@@ -21816,7 +22419,7 @@ rs6000_emit_prologue (void)
 				     info->lr_save_offset,
 				     DFmode, sel);
       rs6000_frame_related (insn, ptr_reg, sp_off,
-			    NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX, NULL_RTX);
       if (lr)
 	END_USE (0);
     }
@@ -21895,7 +22498,7 @@ rs6000_emit_prologue (void)
 					 SAVRES_SAVE | SAVRES_GPR);
 
 	  rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
-				NULL_RTX, NULL_RTX);
+				NULL_RTX, NULL_RTX, NULL_RTX);
 	}
 
       /* Move the static chain pointer back.  */
@@ -21945,7 +22548,7 @@ rs6000_emit_prologue (void)
 				     info->lr_save_offset + ptr_off,
 				     reg_mode, sel);
       rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
-			    NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX, NULL_RTX);
       if (lr)
 	END_USE (0);
     }
@@ -21961,7 +22564,7 @@ rs6000_emit_prologue (void)
 			     info->gp_save_offset + frame_off + reg_size * i);
       insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
       rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-			    NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX, NULL_RTX);
     }
   else if (!WORLD_SAVE_P (info))
     {
@@ -22030,7 +22633,8 @@ rs6000_emit_prologue (void)
 	 be updated if we arrived at this function via a plt call or
 	 toc adjusting stub.  */
       emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
-      toc_restore_insn = TARGET_32BIT ? 0x80410014 : 0xE8410028;
+      toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
+			  + RS6000_TOC_SAVE_SLOT);
       hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
       emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
       compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
@@ -22049,7 +22653,7 @@ rs6000_emit_prologue (void)
       LABEL_NUSES (toc_save_done) += 1;
 
       save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
-				   TOC_REGNUM, frame_off + 5 * reg_size,
+				   TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
 				   sp_off - frame_off);
 
       emit_label (toc_save_done);
@@ -22089,26 +22693,121 @@ rs6000_emit_prologue (void)
       rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
 			       GEN_INT (info->cr_save_offset + frame_off));
       rtx mem = gen_frame_mem (SImode, addr);
-      /* See the large comment above about why CR2_REGNO is used.  */
-      rtx magic_eh_cr_reg = gen_rtx_REG (SImode, CR2_REGNO);
 
       /* If we didn't copy cr before, do so now using r0.  */
       if (cr_save_rtx == NULL_RTX)
 	{
-	  rtx set;
-
 	  START_USE (0);
 	  cr_save_rtx = gen_rtx_REG (SImode, 0);
-	  insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
-	  RTX_FRAME_RELATED_P (insn) = 1;
-	  set = gen_rtx_SET (VOIDmode, cr_save_rtx, magic_eh_cr_reg);
+	  rs6000_emit_move_from_cr (cr_save_rtx);
+	}
+
+      /* Saving CR requires a two-instruction sequence: one instruction
+	 to move the CR to a general-purpose register, and a second
+	 instruction that stores the GPR to memory.
+
+	 We do not emit any DWARF CFI records for the first of these,
+	 because we cannot properly represent the fact that CR is saved in
+	 a register.  One reason is that we cannot express that multiple
+	 CR fields are saved; another reason is that on 64-bit, the size
+	 of the CR register in DWARF (4 bytes) differs from the size of
+	 a general-purpose register.
+
+	 This means if any intervening instruction were to clobber one of
+	 the call-saved CR fields, we'd have incorrect CFI.  To prevent
+	 this from happening, we mark the store to memory as a use of
+	 those CR fields, which prevents any such instruction from being
+	 scheduled in between the two instructions.  */
+      rtx crsave_v[9];
+      int n_crsave = 0;
+      int i;
+
+      crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
+      for (i = 0; i < 8; i++)
+	if (save_reg_p (CR0_REGNO + i))
+	  crsave_v[n_crsave++]
+	    = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
+
+      insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
+					  gen_rtvec_v (n_crsave, crsave_v)));
+      END_USE (REGNO (cr_save_rtx));
+
+      /* Now, there's no way that dwarf2out_frame_debug_expr is going to
+	 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
+	 so we need to construct a frame expression manually.  */
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Update address to be stack-pointer relative, like
+	 rs6000_frame_related would do.  */
+      addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
+			   GEN_INT (info->cr_save_offset + sp_off));
+      mem = gen_frame_mem (SImode, addr);
+
+      if (DEFAULT_ABI == ABI_ELFv2)
+	{
+	  /* In the ELFv2 ABI we generate separate CFI records for each
+	     CR field that was actually saved.  They all point to the
+	     same 32-bit stack slot.  */
+	  rtx crframe[8];
+	  int n_crframe = 0;
+
+	  for (i = 0; i < 8; i++)
+	    if (save_reg_p (CR0_REGNO + i))
+	      {
+		crframe[n_crframe]
+		  = gen_rtx_SET (VOIDmode, mem,
+				 gen_rtx_REG (SImode, CR0_REGNO + i));
+
+		RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
+		n_crframe++;
+	     }
+
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_PARALLEL (VOIDmode,
+					  gen_rtvec_v (n_crframe, crframe)));
+	}
+      else
+	{
+	  /* In other ABIs, by convention, we use a single CR regnum to
+	     represent the fact that all call-saved CR fields are saved.
+	     We use CR2_REGNO to be compatible with gcc-2.95 on Linux.  */
+	  rtx set = gen_rtx_SET (VOIDmode, mem,
+				 gen_rtx_REG (SImode, CR2_REGNO));
 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
 	}
-      insn = emit_move_insn (mem, cr_save_rtx);
-      END_USE (REGNO (cr_save_rtx));
+    }
 
-      rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-			    NULL_RTX, NULL_RTX);
+  /* In the ELFv2 ABI we need to save all call-saved CR fields into
+     *separate* slots if the routine calls __builtin_eh_return, so
+     that they can be independently restored by the unwinder.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+    {
+      int i, cr_off = info->ehcr_offset;
+      rtx crsave;
+
+      /* ??? We might get better performance by using multiple mfocrf
+	 instructions.  */
+      crsave = gen_rtx_REG (SImode, 0);
+      emit_insn (gen_movesi_from_cr (crsave));
+
+      for (i = 0; i < 8; i++)
+	if (!call_used_regs[CR0_REGNO + i])
+	  {
+	    rtvec p = rtvec_alloc (2);
+	    RTVEC_ELT (p, 0)
+	      = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
+	    RTVEC_ELT (p, 1)
+	      = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
+
+	    insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			  gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
+					   sp_reg_rtx, cr_off + sp_off));
+
+	    cr_off += reg_size;
+	  }
     }
 
   /* Update stack and set back pointer unless this is V.4,
@@ -22188,7 +22887,7 @@ rs6000_emit_prologue (void)
 				     info->altivec_save_offset + ptr_off,
 				     0, V4SImode, SAVRES_SAVE | SAVRES_VR);
       rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
-			    NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX, NULL_RTX);
       if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
 	{
 	  /* The oddity mentioned above clobbered our frame reg.  */
@@ -22204,7 +22903,7 @@ rs6000_emit_prologue (void)
       for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
 	if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
 	  {
-	    rtx areg, savereg, mem;
+	    rtx areg, savereg, mem, split_reg;
 	    int offset;
 
 	    offset = (info->altivec_save_offset + frame_off
@@ -22222,8 +22921,18 @@ rs6000_emit_prologue (void)
 
 	    insn = emit_move_insn (mem, savereg);
 
+	    /* When we split a VSX store into two insns, we need to make
+	       sure the DWARF info knows which register we are storing.
+	       Pass it in to be used on the appropriate note.  */
+	    if (!BYTES_BIG_ENDIAN
+		&& GET_CODE (PATTERN (insn)) == SET
+		&& GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
+	      split_reg = savereg;
+	    else
+	      split_reg = NULL_RTX;
+
 	    rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-				  areg, GEN_INT (offset));
+				  areg, GEN_INT (offset), split_reg);
 	  }
     }
 
@@ -22247,7 +22956,8 @@ rs6000_emit_prologue (void)
 	 be using r12 as frame_reg_rtx and r11 as the static chain
 	 pointer for nested functions.  */
       save_regno = 12;
-      if (DEFAULT_ABI == ABI_AIX && !using_static_chain_p)
+      if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	  && !using_static_chain_p)
 	save_regno = 11;
       else if (REGNO (frame_reg_rtx) == 12)
 	{
@@ -22286,10 +22996,10 @@ rs6000_emit_prologue (void)
 	 can use register 0.  This allows us to use a plain 'blr' to return
 	 from the procedure more often.  */
       int save_LR_around_toc_setup = (TARGET_ELF
-				      && DEFAULT_ABI != ABI_AIX
+				      && DEFAULT_ABI == ABI_V4
 				      && flag_pic
 				      && ! info->lr_save_p
-				      && EDGE_COUNT (EXIT_BLOCK_PTR->preds) > 0);
+				      && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
       if (save_LR_around_toc_setup)
 	{
 	  rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
@@ -22348,7 +23058,7 @@ rs6000_emit_prologue (void)
   if (rs6000_save_toc_in_prologue_p ())
     {
       rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
-      emit_insn (gen_frame_store (reg, sp_reg_rtx, 5 * reg_size));
+      emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
     }
 }
 
@@ -22389,6 +23099,49 @@ rs6000_output_function_prologue (FILE *file,
 	}
     }
 
+  /* ELFv2 ABI r2 setup code and local entry point.  This must follow
+     immediately after the global entry point label.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
+    {
+      const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+
+      fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
+      fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
+
+      fputs ("\t.localentry\t", file);
+      assemble_name (file, name);
+      fputs (",.-", file);
+      assemble_name (file, name);
+      fputs ("\n", file);
+    }
+
+  /* Output -mprofile-kernel code.  This needs to be done here instead of
+     in output_function_profile since it must go after the ELFv2 ABI
+     local entry point.  */
+  if (TARGET_PROFILE_KERNEL)
+    {
+      gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
+      gcc_assert (!TARGET_32BIT);
+
+      asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
+      asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
+
+      /* In the ELFv2 ABI we have no compiler stack word.  It must be
+	 the resposibility of _mcount to preserve the static chain
+	 register if required.  */
+      if (DEFAULT_ABI != ABI_ELFv2
+	  && cfun->static_chain_decl != NULL)
+	{
+	  asm_fprintf (file, "\tstd %s,24(%s)\n",
+		       reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
+	  fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
+	  asm_fprintf (file, "\tld %s,24(%s)\n",
+		       reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
+	}
+      else
+	fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
+    }
+
   rs6000_pic_labelno++;
 }
 
@@ -22441,6 +23194,7 @@ restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
 
   if (using_mfcr_multiple && count > 1)
     {
+      rtx insn;
       rtvec p;
       int ndx;
 
@@ -22458,16 +23212,43 @@ restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
 			   gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
 	    ndx++;
 	  }
-      emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+      insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
       gcc_assert (ndx == count);
+
+      /* For the ELFv2 ABI we generate a CFA_RESTORE for each
+	 CR field separately.  */
+      if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
+	{
+	  for (i = 0; i < 8; i++)
+	    if (save_reg_p (CR0_REGNO + i))
+	      add_reg_note (insn, REG_CFA_RESTORE,
+			    gen_rtx_REG (SImode, CR0_REGNO + i));
+
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
     }
   else
     for (i = 0; i < 8; i++)
       if (save_reg_p (CR0_REGNO + i))
-	emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode, CR0_REGNO + i),
-					reg));
+	{
+	  rtx insn = emit_insn (gen_movsi_to_cr_one
+				 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
+
+	  /* For the ELFv2 ABI we generate a CFA_RESTORE for each
+	     CR field separately, attached to the insn that in fact
+	     restores this particular CR field.  */
+	  if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
+	    {
+	      add_reg_note (insn, REG_CFA_RESTORE,
+			    gen_rtx_REG (SImode, CR0_REGNO + i));
 
-  if (!exit_func && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	}
+
+  /* For other ABIs, we just generate a single CFA_RESTORE for CR2.  */
+  if (!exit_func && DEFAULT_ABI != ABI_ELFv2
+      && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
     {
       rtx insn = get_last_insn ();
       rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
@@ -22508,10 +23289,22 @@ restore_saved_lr (int regno, bool exit_func)
 static rtx
 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
 {
-  if (info->cr_save_p)
+  if (DEFAULT_ABI == ABI_ELFv2)
+    {
+      int i;
+      for (i = 0; i < 8; i++)
+	if (save_reg_p (CR0_REGNO + i))
+	  {
+	    rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
+					   cfa_restores);
+	  }
+    }
+  else if (info->cr_save_p)
     cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
 				   gen_rtx_REG (SImode, CR2_REGNO),
 				   cfa_restores);
+
   if (info->lr_save_p)
     cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
 				   gen_rtx_REG (Pmode, LR_REGNO),
@@ -23009,6 +23802,35 @@ rs6000_emit_epilogue (int sibcall)
 	       || (!restoring_GPRs_inline
 		   && info->first_fp_reg_save == 64));
 
+  /* In the ELFv2 ABI we need to restore all call-saved CR fields from
+     *separate* slots if the routine calls __builtin_eh_return, so
+     that they can be independently restored by the unwinder.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+    {
+      int i, cr_off = info->ehcr_offset;
+
+      for (i = 0; i < 8; i++)
+	if (!call_used_regs[CR0_REGNO + i])
+	  {
+	    rtx reg = gen_rtx_REG (SImode, 0);
+	    emit_insn (gen_frame_load (reg, frame_reg_rtx,
+				       cr_off + frame_off));
+
+	    insn = emit_insn (gen_movsi_to_cr_one
+				(gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
+
+	    if (!exit_func && flag_shrink_wrap)
+	      {
+		add_reg_note (insn, REG_CFA_RESTORE,
+			      gen_rtx_REG (SImode, CR0_REGNO + i));
+
+		RTX_FRAME_RELATED_P (insn) = 1;
+	      }
+
+	    cr_off += reg_size;
+	  }
+    }
+
   /* Get the old lr if we saved it.  If we are restoring registers
      out-of-line, then the out-of-line routines can do this for us.  */
   if (restore_lr && restoring_GPRs_inline)
@@ -23052,7 +23874,7 @@ rs6000_emit_epilogue (int sibcall)
 	{
 	  rtx reg = gen_rtx_REG (reg_mode, 2);
 	  emit_insn (gen_frame_load (reg, frame_reg_rtx,
-				     frame_off + 5 * reg_size));
+				     frame_off + RS6000_TOC_SAVE_SLOT));
 	}
 
       for (i = 0; ; ++i)
@@ -23338,6 +24160,7 @@ rs6000_emit_epilogue (int sibcall)
       if (! restoring_FPRs_inline)
 	{
 	  int i;
+	  int reg;
 	  rtx sym;
 
 	  if (flag_shrink_wrap)
@@ -23346,10 +24169,9 @@ rs6000_emit_epilogue (int sibcall)
 	  sym = rs6000_savres_routine_sym (info,
 					   SAVRES_FPR | (lr ? SAVRES_LR : 0));
 	  RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
-	  RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode,
-					  gen_rtx_REG (Pmode,
-						       DEFAULT_ABI == ABI_AIX
-						       ? 1 : 11));
+	  reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
+	  RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
+
 	  for (i = 0; i < 64 - info->first_fp_reg_save; i++)
 	    {
 	      rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
@@ -23427,7 +24249,8 @@ rs6000_output_function_epilogue (FILE *file,
 
      System V.4 Powerpc's (and the embedded ABI derived from it) use a
      different traceback table.  */
-  if (DEFAULT_ABI == ABI_AIX && ! flag_inhibit_size_directive
+  if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+      && ! flag_inhibit_size_directive
       && rs6000_traceback != traceback_none && !cfun->is_thunk)
     {
       const char *fname = NULL;
@@ -23755,6 +24578,12 @@ rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
   SIBLING_CALL_P (insn) = 1;
   emit_barrier ();
 
+  /* Ensure we have a global entry point for the thunk.   ??? We could
+     avoid that if the target routine doesn't need a global entry point,
+     but we do not know whether this is the case at this point.  */
+  if (DEFAULT_ABI == ABI_ELFv2)
+    cfun->machine->r2_setup_needed = true;
+
   /* Run just enough of rest_of_compilation to get the insns emitted.
      There's not really enough bulk here to make other passes such as
      instruction scheduling worth while.  Note that use_thunk calls
@@ -24430,7 +25259,7 @@ output_profile_hook (int labelno ATTRIBUTE_UNUSED)
   if (TARGET_PROFILE_KERNEL)
     return;
 
-  if (DEFAULT_ABI == ABI_AIX)
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
     {
 #ifndef NO_PROFILE_COUNTERS
 # define NO_PROFILE_COUNTERS 0
@@ -24574,29 +25403,9 @@ output_function_profiler (FILE *file, int labelno)
       break;
 
     case ABI_AIX:
+    case ABI_ELFv2:
     case ABI_DARWIN:
-      if (!TARGET_PROFILE_KERNEL)
-	{
-	  /* Don't do anything, done in output_profile_hook ().  */
-	}
-      else
-	{
-	  gcc_assert (!TARGET_32BIT);
-
-	  asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
-	  asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
-
-	  if (cfun->static_chain_decl != NULL)
-	    {
-	      asm_fprintf (file, "\tstd %s,24(%s)\n",
-			   reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
-	      fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
-	      asm_fprintf (file, "\tld %s,24(%s)\n",
-			   reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
-	    }
-	  else
-	    fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
-	}
+      /* Don't do anything, done in output_profile_hook ().  */
       break;
     }
 }
@@ -26532,6 +27341,11 @@ rs6000_trampoline_size (void)
       ret = (TARGET_32BIT) ? 12 : 24;
       break;
 
+    case ABI_ELFv2:
+      gcc_assert (!TARGET_32BIT);
+      ret = 32;
+      break;
+
     case ABI_DARWIN:
     case ABI_V4:
       ret = (TARGET_32BIT) ? 40 : 48;
@@ -26587,6 +27401,7 @@ rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
       break;
 
     /* Under V.4/eabi/darwin, __trampoline_setup does the real work.  */
+    case ABI_ELFv2:
     case ABI_DARWIN:
     case ABI_V4:
       emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
@@ -26881,7 +27696,7 @@ rs6000_ms_bitfield_layout_p (const_tree record_type)
 static void
 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
 {
-  if (DEFAULT_ABI == ABI_AIX
+  if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
       && TARGET_MINIMAL_TOC
       && !TARGET_RELOCATABLE)
     {
@@ -26902,7 +27717,8 @@ rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
       else
 	fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
     }
-  else if (DEFAULT_ABI == ABI_AIX && !TARGET_RELOCATABLE)
+  else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	   && !TARGET_RELOCATABLE)
     fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
   else
     {
@@ -27452,7 +28268,7 @@ rs6000_elf_reloc_rw_mask (void)
 {
   if (flag_pic)
     return 3;
-  else if (DEFAULT_ABI == ABI_AIX)
+  else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
     return 2;
   else
     return 0;
@@ -27528,7 +28344,7 @@ rs6000_elf_asm_out_destructor (rtx symbol, int priority)
 void
 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
 {
-  if (TARGET_64BIT)
+  if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
     {
       fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
       ASM_OUTPUT_LABEL (file, name);
@@ -27594,8 +28410,7 @@ rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
       fprintf (file, "%s:\n", desc_name);
       fprintf (file, "\t.long %s\n", orig_name);
       fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
-      if (DEFAULT_ABI == ABI_AIX)
-	fputs ("\t.long 0\n", file);
+      fputs ("\t.long 0\n", file);
       fprintf (file, "\t.previous\n");
     }
   ASM_OUTPUT_LABEL (file, name);
@@ -27624,7 +28439,7 @@ rs6000_elf_file_end (void)
     }
 #endif
 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
-  if (TARGET_32BIT)
+  if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
     file_end_indicate_exec_stack ();
 #endif
 }
@@ -29037,6 +29852,8 @@ altivec_expand_vec_perm_const (rtx operands[4])
 	  break;
       if (i == 16)
 	{
+          if (!BYTES_BIG_ENDIAN)
+            elt = 15 - elt;
 	  emit_insn (gen_altivec_vspltb (target, op0, GEN_INT (elt)));
 	  return true;
 	}
@@ -29299,7 +30116,7 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
   unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
   rtx perm[16];
 
-  high = (highp == BYTES_BIG_ENDIAN ? 0 : nelt / 2);
+  high = (highp ? 0 : nelt / 2);
   for (i = 0; i < nelt / 2; i++)
     {
       perm[i * 2] = GEN_INT (i + high);
@@ -29354,6 +30171,8 @@ rs6000_function_value (const_tree valtype,
 {
   enum machine_mode mode;
   unsigned int regno;
+  enum machine_mode elt_mode;
+  int n_elts;
 
   /* Special handling for structs in darwin64.  */
   if (TARGET_MACHO 
@@ -29373,6 +30192,36 @@ rs6000_function_value (const_tree valtype,
       /* Otherwise fall through to standard ABI rules.  */
     }
 
+  /* The ELFv2 ABI returns homogeneous VFP aggregates in registers.  */
+  if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (valtype), valtype,
+					     &elt_mode, &n_elts))
+    {
+      int first_reg, n_regs, i;
+      rtx par;
+
+      if (SCALAR_FLOAT_MODE_P (elt_mode))
+	{
+	  /* _Decimal128 must use even/odd register pairs.  */
+	  first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
+	  n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
+	}
+      else
+	{
+	  first_reg = ALTIVEC_ARG_RETURN;
+	  n_regs = 1;
+	}
+
+      par = gen_rtx_PARALLEL (TYPE_MODE (valtype), rtvec_alloc (n_elts));
+      for (i = 0; i < n_elts; i++)
+	{
+	  rtx r = gen_rtx_REG (elt_mode, first_reg + i * n_regs);
+	  rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+	  XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+	}
+
+      return par;
+    }
+
   if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
     {
       /* Long long return value need be split in -mpowerpc64, 32bit ABI.  */
@@ -30578,118 +31427,149 @@ rs6000_legitimate_constant_p (enum machine_mode mode, rtx x)
 }
 
 
-/* A function pointer under AIX is a pointer to a data area whose first word
-   contains the actual address of the function, whose second word contains a
-   pointer to its TOC, and whose third word contains a value to place in the
-   static chain register (r11).  Note that if we load the static chain, our
-   "trampoline" need not have any executable code.  */
+
+/* Expand code to perform a call under the AIX or ELFv2 ABI.  */
 
 void
-rs6000_call_indirect_aix (rtx value, rtx func_desc, rtx flag)
+rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
 {
+  rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
+  rtx toc_load = NULL_RTX;
+  rtx toc_restore = NULL_RTX;
   rtx func_addr;
-  rtx toc_reg;
-  rtx sc_reg;
-  rtx stack_ptr;
-  rtx stack_toc_offset;
-  rtx stack_toc_mem;
-  rtx func_toc_offset;
-  rtx func_toc_mem;
-  rtx func_sc_offset;
-  rtx func_sc_mem;
+  rtx abi_reg = NULL_RTX;
+  rtx call[4];
+  int n_call;
   rtx insn;
-  rtx (*call_func) (rtx, rtx, rtx, rtx);
-  rtx (*call_value_func) (rtx, rtx, rtx, rtx, rtx);
-
-  stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
-  toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
 
-  /* Load up address of the actual function.  */
-  func_desc = force_reg (Pmode, func_desc);
-  func_addr = gen_reg_rtx (Pmode);
-  emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
-
-  if (TARGET_32BIT)
-    {
+  /* Handle longcall attributes.  */
+  if (INTVAL (cookie) & CALL_LONG)
+    func_desc = rs6000_longcall_ref (func_desc);
+
+  /* Handle indirect calls.  */
+  if (GET_CODE (func_desc) != SYMBOL_REF
+      || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
+    {
+      /* Save the TOC into its reserved slot before the call,
+	 and prepare to restore it after the call.  */
+      rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+      rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
+      rtx stack_toc_mem = gen_frame_mem (Pmode,
+					 gen_rtx_PLUS (Pmode, stack_ptr,
+						       stack_toc_offset));
+      toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_mem);
+
+      /* Can we optimize saving the TOC in the prologue or
+	 do we need to do it at every call?  */
+      if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
+	cfun->machine->save_toc_in_prologue = true;
+      else
+	{
+	  MEM_VOLATILE_P (stack_toc_mem) = 1;
+	  emit_move_insn (stack_toc_mem, toc_reg);
+	}
 
-      stack_toc_offset = GEN_INT (TOC_SAVE_OFFSET_32BIT);
-      func_toc_offset = GEN_INT (AIX_FUNC_DESC_TOC_32BIT);
-      func_sc_offset = GEN_INT (AIX_FUNC_DESC_SC_32BIT);
-      if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
+      if (DEFAULT_ABI == ABI_ELFv2)
 	{
-	  call_func = gen_call_indirect_aix32bit;
-	  call_value_func = gen_call_value_indirect_aix32bit;
+	  /* A function pointer in the ELFv2 ABI is just a plain address, but
+	     the ABI requires it to be loaded into r12 before the call.  */
+	  func_addr = gen_rtx_REG (Pmode, 12);
+	  emit_move_insn (func_addr, func_desc);
+	  abi_reg = func_addr;
 	}
       else
 	{
-	  call_func = gen_call_indirect_aix32bit_nor11;
-	  call_value_func = gen_call_value_indirect_aix32bit_nor11;
+	  /* A function pointer under AIX is a pointer to a data area whose
+	     first word contains the actual address of the function, whose
+	     second word contains a pointer to its TOC, and whose third word
+	     contains a value to place in the static chain register (r11).
+	     Note that if we load the static chain, our "trampoline" need
+	     not have any executable code.  */
+
+	  /* Load up address of the actual function.  */
+	  func_desc = force_reg (Pmode, func_desc);
+	  func_addr = gen_reg_rtx (Pmode);
+	  emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
+
+	  /* Prepare to load the TOC of the called function.  Note that the
+	     TOC load must happen immediately before the actual call so
+	     that unwinding the TOC registers works correctly.  See the
+	     comment in frob_update_context.  */
+	  rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
+	  rtx func_toc_mem = gen_rtx_MEM (Pmode,
+					  gen_rtx_PLUS (Pmode, func_desc,
+							func_toc_offset));
+	  toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
+
+	  /* If we have a static chain, load it up.  */
+	  if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
+	    {
+	      rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
+	      rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
+	      rtx func_sc_mem = gen_rtx_MEM (Pmode,
+					     gen_rtx_PLUS (Pmode, func_desc,
+							   func_sc_offset));
+	      emit_move_insn (sc_reg, func_sc_mem);
+	      abi_reg = sc_reg;
+	    }
 	}
     }
   else
     {
-      stack_toc_offset = GEN_INT (TOC_SAVE_OFFSET_64BIT);
-      func_toc_offset = GEN_INT (AIX_FUNC_DESC_TOC_64BIT);
-      func_sc_offset = GEN_INT (AIX_FUNC_DESC_SC_64BIT);
-      if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
-	{
-	  call_func = gen_call_indirect_aix64bit;
-	  call_value_func = gen_call_value_indirect_aix64bit;
-	}
-      else
-	{
-	  call_func = gen_call_indirect_aix64bit_nor11;
-	  call_value_func = gen_call_value_indirect_aix64bit_nor11;
-	}
+      /* Direct calls use the TOC: for local calls, the callee will
+	 assume the TOC register is set; for non-local calls, the
+	 PLT stub needs the TOC register.  */
+      abi_reg = toc_reg;
+      func_addr = func_desc;
     }
 
-  /* Reserved spot to store the TOC.  */
-  stack_toc_mem = gen_frame_mem (Pmode,
-				 gen_rtx_PLUS (Pmode,
-					       stack_ptr,
-					       stack_toc_offset));
+  /* Create the call.  */
+  call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
+  if (value != NULL_RTX)
+    call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
+  n_call = 1;
 
-  gcc_assert (cfun);
-  gcc_assert (cfun->machine);
+  if (toc_load)
+    call[n_call++] = toc_load;
+  if (toc_restore)
+    call[n_call++] = toc_restore;
 
-  /* Can we optimize saving the TOC in the prologue or do we need to do it at
-     every call?  */
-  if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
-    cfun->machine->save_toc_in_prologue = true;
+  call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
 
-  else
-    {
-      MEM_VOLATILE_P (stack_toc_mem) = 1;
-      emit_move_insn (stack_toc_mem, toc_reg);
-    }
+  insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
+  insn = emit_call_insn (insn);
 
-  /* Calculate the address to load the TOC of the called function.  We don't
-     actually load this until the split after reload.  */
-  func_toc_mem = gen_rtx_MEM (Pmode,
-			      gen_rtx_PLUS (Pmode,
-					    func_desc,
-					    func_toc_offset));
+  /* Mention all registers defined by the ABI to hold information
+     as uses in CALL_INSN_FUNCTION_USAGE.  */
+  if (abi_reg)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
+}
 
-  /* If we have a static chain, load it up.  */
-  if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
-    {
-      func_sc_mem = gen_rtx_MEM (Pmode,
-				 gen_rtx_PLUS (Pmode,
-					       func_desc,
-					       func_sc_offset));
+/* Expand code to perform a sibling call under the AIX or ELFv2 ABI.  */
 
-      sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
-      emit_move_insn (sc_reg, func_sc_mem);
-    }
+void
+rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
+{
+  rtx call[2];
+  rtx insn;
+
+  gcc_assert (INTVAL (cookie) == 0);
 
   /* Create the call.  */
-  if (value)
-    insn = call_value_func (value, func_addr, flag, func_toc_mem,
-			    stack_toc_mem);
-  else
-    insn = call_func (func_addr, flag, func_toc_mem, stack_toc_mem);
+  call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
+  if (value != NULL_RTX)
+    call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
+
+  call[1] = simple_return_rtx;
+
+  insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
+  insn = emit_call_insn (insn);
 
-  emit_call_insn (insn);
+  /* Note use of the TOC register.  */
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
+  /* We need to also mark a use of the link register since the function we
+     sibling-call to will use it to return to our caller.  */
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
 }
 
 /* Return whether we need to always update the saved TOC pointer when we update
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index f13951eb235..eb59235ec61 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1527,21 +1527,14 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
 #define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0			\
 			      || (flag_sanitize & SANITIZE_ADDRESS) != 0)
 
-/* Size of the outgoing register save area */
-#define RS6000_REG_SAVE ((DEFAULT_ABI == ABI_AIX			\
-			  || DEFAULT_ABI == ABI_DARWIN)			\
-			 ? (TARGET_64BIT ? 64 : 32)			\
-			 : 0)
-
 /* Size of the fixed area on the stack */
 #define RS6000_SAVE_AREA \
-  (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_DARWIN) ? 24 : 8)	\
+  ((DEFAULT_ABI == ABI_V4 ? 8 : DEFAULT_ABI == ABI_ELFv2 ? 16 : 24)	\
    << (TARGET_64BIT ? 1 : 0))
 
-/* MEM representing address to save the TOC register */
-#define RS6000_SAVE_TOC gen_rtx_MEM (Pmode, \
-				     plus_constant (Pmode, stack_pointer_rtx, \
-						    (TARGET_32BIT ? 20 : 40)))
+/* Stack offset for toc save slot.  */
+#define RS6000_TOC_SAVE_SLOT \
+  ((DEFAULT_ABI == ABI_ELFv2 ? 12 : 20) << (TARGET_64BIT ? 1 : 0))
 
 /* Align an address */
 #define RS6000_ALIGN(n,a) (((n) + (a) - 1) & ~((a) - 1))
@@ -1591,7 +1584,7 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
 /* Define this if stack space is still allocated for a parameter passed
    in a register.  The value is the number of bytes allocated to this
    area.  */
-#define REG_PARM_STACK_SPACE(FNDECL)	RS6000_REG_SAVE
+#define REG_PARM_STACK_SPACE(FNDECL) rs6000_reg_parm_stack_space((FNDECL))
 
 /* Define this if the above stack space is to be considered part of the
    space allocated by the caller.  */
@@ -1635,9 +1628,8 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
 #define FP_ARG_MIN_REG 33
 #define	FP_ARG_AIX_MAX_REG 45
 #define	FP_ARG_V4_MAX_REG  40
-#define	FP_ARG_MAX_REG ((DEFAULT_ABI == ABI_AIX				\
-			 || DEFAULT_ABI == ABI_DARWIN)			\
-			? FP_ARG_AIX_MAX_REG : FP_ARG_V4_MAX_REG)
+#define	FP_ARG_MAX_REG (DEFAULT_ABI == ABI_V4				\
+			? FP_ARG_V4_MAX_REG : FP_ARG_AIX_MAX_REG)
 #define FP_ARG_NUM_REG (FP_ARG_MAX_REG - FP_ARG_MIN_REG + 1)
 
 /* Minimum and maximum AltiVec registers used to hold arguments.  */
@@ -1645,10 +1637,17 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
 #define ALTIVEC_ARG_MAX_REG (ALTIVEC_ARG_MIN_REG + 11)
 #define ALTIVEC_ARG_NUM_REG (ALTIVEC_ARG_MAX_REG - ALTIVEC_ARG_MIN_REG + 1)
 
+/* Maximum number of registers per ELFv2 homogeneous aggregate argument.  */
+#define AGGR_ARG_NUM_REG 8
+
 /* Return registers */
 #define GP_ARG_RETURN GP_ARG_MIN_REG
 #define FP_ARG_RETURN FP_ARG_MIN_REG
 #define ALTIVEC_ARG_RETURN (FIRST_ALTIVEC_REGNO + 2)
+#define FP_ARG_MAX_RETURN (DEFAULT_ABI != ABI_ELFv2 ? FP_ARG_RETURN	\
+			   : (FP_ARG_RETURN + AGGR_ARG_NUM_REG - 1))
+#define ALTIVEC_ARG_MAX_RETURN (DEFAULT_ABI != ABI_ELFv2 ? ALTIVEC_ARG_RETURN \
+			        : (ALTIVEC_ARG_RETURN + AGGR_ARG_NUM_REG - 1))
 
 /* Flags for the call/call_value rtl operations set up by function_arg */
 #define CALL_NORMAL		0x00000000	/* no special processing */
@@ -1668,8 +1667,10 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
    On RS/6000, this is r3, fp1, and v2 (for AltiVec).  */
 #define FUNCTION_VALUE_REGNO_P(N)					\
   ((N) == GP_ARG_RETURN							\
-   || ((N) == FP_ARG_RETURN && TARGET_HARD_FLOAT && TARGET_FPRS)	\
-   || ((N) == ALTIVEC_ARG_RETURN && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI))
+   || ((N) >= FP_ARG_RETURN && (N) <= FP_ARG_MAX_RETURN			\
+       && TARGET_HARD_FLOAT && TARGET_FPRS)				\
+   || ((N) >= ALTIVEC_ARG_RETURN && (N) <= ALTIVEC_ARG_MAX_RETURN	\
+       && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI))
 
 /* 1 if N is a possible register number for function argument passing.
    On RS/6000, these are r3-r10 and fp1-fp13.
@@ -1793,11 +1794,8 @@ typedef struct rs6000_args
 /* Number of bytes into the frame return addresses can be found.  See
    rs6000_stack_info in rs6000.c for more information on how the different
    abi's store the return address.  */
-#define RETURN_ADDRESS_OFFSET						\
- ((DEFAULT_ABI == ABI_AIX						\
-   || DEFAULT_ABI == ABI_DARWIN)	? (TARGET_32BIT ? 8 : 16) :	\
-  (DEFAULT_ABI == ABI_V4)		? 4 :				\
-  (internal_error ("RETURN_ADDRESS_OFFSET not supported"), 0))
+#define RETURN_ADDRESS_OFFSET \
+  ((DEFAULT_ABI == ABI_V4 ? 4 : 8) << (TARGET_64BIT ? 1 : 0))
 
 /* The current return address is in link register (65).  The return address
    of anything farther back is accessed normally at an offset of 8 from the
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 3f13c4603f7..4de7c3a2f6e 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -56,18 +56,6 @@
    (TFHAR_REGNO			114)
    (TFIAR_REGNO			115)
    (TEXASR_REGNO		116)
-
-   ; ABI defined stack offsets for storing the TOC pointer with AIX calls.
-   (TOC_SAVE_OFFSET_32BIT	20)
-   (TOC_SAVE_OFFSET_64BIT	40)
-
-   ; Function TOC offset in the AIX function descriptor.
-   (AIX_FUNC_DESC_TOC_32BIT	4)
-   (AIX_FUNC_DESC_TOC_64BIT	8)
-
-   ; Static chain offset in the AIX function descriptor.
-   (AIX_FUNC_DESC_SC_32BIT	8)
-   (AIX_FUNC_DESC_SC_64BIT	16)
   ])
 
 ;;
@@ -5226,7 +5214,7 @@
   "TARGET_<MODE>_FPR"
   "@
    fcmpu %0,%1,%2
-   xscmpudp %x0,%x1,%x2"
+   xscmpudp %0,%x1,%x2"
   [(set_attr "type" "fpcompare")])
 
 ;; Floating point conversions
@@ -5237,8 +5225,8 @@
   "")
 
 (define_insn_and_split "*extendsfdf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,wy,?wy,wv")
-	(float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m,0,wz,Z")))]
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,ws,?ws,wv")
+	(float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m,0,wy,Z")))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
   "@
    #
@@ -5360,7 +5348,7 @@
   "TARGET_<MODE>_FPR && TARGET_CMPB"
   "@
    fcpsgn %0,%2,%1
-   xscpsgn<VSs> %x0,%x2,%x1"
+   xscpsgn<Fvsx> %x0,%x2,%x1"
   [(set_attr "type" "fp")])
 
 ;; For MIN, MAX, and conditional move, we use DEFINE_EXPAND's that involve a
@@ -10081,8 +10069,8 @@
 ;; Use of fprs is disparaged slightly otherwise reload prefers to reload
 ;; a gpr into a fpr instead of reloading an invalid 'Y' address
 (define_insn "*movdi_internal32"
-  [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" "=Y,r,r,?m,?*d,?*d,r,?wa")
-	(match_operand:DI 1 "input_operand" "r,Y,r,d,m,d,IJKnGHF,O"))]
+  [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" "=Y,r,r,?m,?*d,?*d,r")
+	(match_operand:DI 1 "input_operand" "r,Y,r,d,m,d,IJKnGHF"))]
   "! TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], DImode)
        || gpc_reg_operand (operands[1], DImode))"
@@ -10093,8 +10081,7 @@
    stfd%U0%X0 %1,%0
    lfd%U1%X1 %0,%1
    fmr %0,%1
-   #
-   xxlxor %x0,%x0,%x0"
+   #"
   [(set_attr_alternative "type"
       [(const_string "store")
        (const_string "load")
@@ -10114,8 +10101,7 @@
 	   (const_string "fpload_u")
 	   (const_string "fpload")))
        (const_string "fp")
-       (const_string "*")
-       (const_string "vecsimple")])])
+       (const_string "*")])])
 
 (define_split
   [(set (match_operand:DI 0 "gpc_reg_operand" "")
@@ -10146,8 +10132,8 @@
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
 
 (define_insn "*movdi_internal64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wv,?wa,r,*h,*h,?wa,r,?*wg,r,?*wm")
-	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wv,Z,wa,*h,r,0,O,*wg,r,*wm,r"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,r,?*wg,r,?*wm")
+	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,*wg,r,*wm,r"))]
   "TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], DImode)
        || gpc_reg_operand (operands[1], DImode))"
@@ -10161,13 +10147,9 @@
    stfd%U0%X0 %1,%0
    lfd%U1%X1 %0,%1
    fmr %0,%1
-   stxsd%U0x %x1,%y0
-   lxsd%U1x %x0,%y1
-   xxlor %x0,%x1,%x1
    mf%1 %0
    mt%0 %1
    nop
-   xxlxor %x0,%x0,%x0
    mftgpr %0,%1
    mffgpr %0,%1
    mfvsrd %0,%x1
@@ -10206,24 +10188,14 @@
 	   (const_string "fpload_u")
 	   (const_string "fpload")))
        (const_string "fp")
-       (if_then_else
-	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
-	 (const_string "fpstore_ux")
-	 (const_string "fpstore"))
-       (if_then_else
-	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
-	 (const_string "fpload_ux")
-	 (const_string "fpload"))
-       (const_string "vecsimple")
        (const_string "mfjmpr")
        (const_string "mtjmpr")
        (const_string "*")
-       (const_string "vecsimple")
        (const_string "mftgpr")
        (const_string "mffgpr")
        (const_string "mftgpr")
        (const_string "mffgpr")])
-   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4,4,4")])
+   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4")])
 
 ;; Generate all one-bits and clear left or right.
 ;; Use (and:DI (rotate:DI ...)) to avoid anddi3 unnecessary clobber.
@@ -11213,7 +11185,7 @@
 	 	    (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
 		   UNSPEC_TLSGD)
    (clobber (reg:SI LR_REGNO))]
-  "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX"
+  "HAVE_AS_TLS && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
 {
   if (TARGET_CMODEL != CMODEL_SMALL)
     return "addis %0,%1,%2@got@tlsgd@ha\;addi %0,%0,%2@got@tlsgd@l\;"
@@ -11322,7 +11294,8 @@
    (unspec:TLSmode [(match_operand:TLSmode 3 "rs6000_tls_symbol_ref" "")]
 		   UNSPEC_TLSGD)
    (clobber (reg:SI LR_REGNO))]
-  "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX && TARGET_TLS_MARKERS"
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS
+   && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
   "bl %z1(%3@tlsgd)\;nop"
   [(set_attr "type" "branch")
    (set_attr "length" "8")])
@@ -11354,7 +11327,7 @@
    (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")]
 		   UNSPEC_TLSLD)
    (clobber (reg:SI LR_REGNO))]
-  "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX"
+  "HAVE_AS_TLS && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
 {
   if (TARGET_CMODEL != CMODEL_SMALL)
     return "addis %0,%1,%&@got@tlsld@ha\;addi %0,%0,%&@got@tlsld@l\;"
@@ -11457,7 +11430,8 @@
 	      (match_operand 2 "" "g")))
    (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)
    (clobber (reg:SI LR_REGNO))]
-  "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX && TARGET_TLS_MARKERS"
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS
+   && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
   "bl %z1(%&@tlsld)\;nop"
   [(set_attr "type" "branch")
    (set_attr "length" "8")])
@@ -11828,7 +11802,7 @@
   [(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
 		   (unspec:SI [(const_int 0)] UNSPEC_TOC))
 	      (use (reg:SI 2))])]
-  "DEFAULT_ABI == ABI_AIX && TARGET_32BIT"
+  "(DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && TARGET_32BIT"
   "*
 {
   char buf[30];
@@ -11843,7 +11817,7 @@
   [(parallel [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
 		   (unspec:DI [(const_int 0)] UNSPEC_TOC))
 	      (use (reg:DI 2))])]
-  "DEFAULT_ABI == ABI_AIX && TARGET_64BIT"
+  "(DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && TARGET_64BIT"
   "*
 {
   char buf[30];
@@ -11873,7 +11847,7 @@
   [(parallel [(set (reg:SI LR_REGNO)
 		   (match_operand:SI 0 "immediate_operand" "s"))
 	      (use (unspec [(match_dup 0)] UNSPEC_TOC))])]
-  "TARGET_ELF && DEFAULT_ABI != ABI_AIX
+  "TARGET_ELF && DEFAULT_ABI == ABI_V4
    && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
   "")
 
@@ -11881,7 +11855,7 @@
   [(set (reg:SI LR_REGNO)
 	(match_operand:SI 0 "immediate_operand" "s"))
    (use (unspec [(match_dup 0)] UNSPEC_TOC))]
-  "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX
+  "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4
    && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
   "bcl 20,31,%0\\n%0:"
   [(set_attr "type" "branch")
@@ -11891,7 +11865,7 @@
   [(set (reg:SI LR_REGNO)
 	(match_operand:SI 0 "immediate_operand" "s"))
    (use (unspec [(match_dup 0)] UNSPEC_TOC))]
-  "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX
+  "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4
    && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
   "*
 {
@@ -11911,7 +11885,7 @@
 			       (label_ref (match_operand 1 "" ""))]
 		           UNSPEC_TOCPTR))
 	      (match_dup 1)])]
-  "TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
+  "TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
   "")
 
 (define_insn "load_toc_v4_PIC_1b_normal"
@@ -11920,7 +11894,7 @@
 		    (label_ref (match_operand 1 "" ""))]
 		UNSPEC_TOCPTR))
    (match_dup 1)]
-  "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
+  "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
   "bcl 20,31,$+8\;.long %0-$"
   [(set_attr "type" "branch")
    (set_attr "length" "8")])
@@ -11931,7 +11905,7 @@
 		    (label_ref (match_operand 1 "" ""))]
 		UNSPEC_TOCPTR))
    (match_dup 1)]
-  "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
+  "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
   "*
 {
   char name[32];
@@ -11949,7 +11923,7 @@
 	(mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
 		   (minus:SI (match_operand:SI 2 "immediate_operand" "s")
 			     (match_operand:SI 3 "immediate_operand" "s")))))]
-  "TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
+  "TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
   "lwz %0,%2-%3(%1)"
   [(set_attr "type" "load")])
 
@@ -11959,7 +11933,7 @@
 		 (high:SI
 		   (minus:SI (match_operand:SI 2 "symbol_ref_operand" "s")
 			     (match_operand:SI 3 "symbol_ref_operand" "s")))))]
-  "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic"
+  "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic"
   "addis %0,%1,%2-%3@ha")
 
 (define_insn "load_toc_v4_PIC_3c"
@@ -11967,7 +11941,7 @@
 	(lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b")
 		   (minus:SI (match_operand:SI 2 "symbol_ref_operand" "s")
 			     (match_operand:SI 3 "symbol_ref_operand" "s"))))]
-  "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic"
+  "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic"
   "addi %0,%1,%2-%3@l")
 
 ;; If the TOC is shared over a translation unit, as happens with all
@@ -12109,8 +12083,13 @@
 
   operands[0] = XEXP (operands[0], 0);
 
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+      rs6000_call_aix (NULL_RTX, operands[0], operands[1], operands[2]);
+      DONE;
+    }
+
   if (GET_CODE (operands[0]) != SYMBOL_REF
-      || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (operands[0]))
       || (DEFAULT_ABI != ABI_DARWIN && (INTVAL (operands[2]) & CALL_LONG) != 0))
     {
       if (INTVAL (operands[2]) & CALL_LONG)
@@ -12123,12 +12102,6 @@
 	  operands[0] = force_reg (Pmode, operands[0]);
 	  break;
 
-	case ABI_AIX:
-	  /* AIX function pointers are really pointers to a three word
-	     area.  */
-	  rs6000_call_indirect_aix (NULL_RTX, operands[0], operands[1]);
-	  DONE;
-
 	default:
 	  gcc_unreachable ();
 	}
@@ -12154,8 +12127,13 @@
 
   operands[1] = XEXP (operands[1], 0);
 
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+      rs6000_call_aix (operands[0], operands[1], operands[2], operands[3]);
+      DONE;
+    }
+
   if (GET_CODE (operands[1]) != SYMBOL_REF
-      || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (operands[1]))
       || (DEFAULT_ABI != ABI_DARWIN && (INTVAL (operands[3]) & CALL_LONG) != 0))
     {
       if (INTVAL (operands[3]) & CALL_LONG)
@@ -12168,12 +12146,6 @@
 	  operands[1] = force_reg (Pmode, operands[1]);
 	  break;
 
-	case ABI_AIX:
-	  /* AIX function pointers are really pointers to a three word
-	     area.  */
-	  rs6000_call_indirect_aix (operands[0], operands[1], operands[2]);
-	  DONE;
-
 	default:
 	  gcc_unreachable ();
 	}
@@ -12265,135 +12237,6 @@
   [(set_attr "type" "branch")
    (set_attr "length" "4,8")])
 
-;; Call to indirect functions with the AIX abi using a 3 word descriptor.
-;; Operand0 is the addresss of the function to call
-;; Operand1 is the flag for System V.4 for unprototyped or FP registers
-;; Operand2 is the location in the function descriptor to load r2 from
-;; Operand3 is the stack location to hold the current TOC pointer
-
-(define_insn "call_indirect_aix<ptrsize>"
-  [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
-	 (match_operand 1 "" "g,g"))
-   (use (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
-   (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
-   (use (reg:P STATIC_CHAIN_REGNUM))
-   (clobber (reg:P LR_REGNO))]
-  "DEFAULT_ABI == ABI_AIX && TARGET_POINTERS_TO_NESTED_FUNCTIONS"
-  "<ptrload> 2,%2\;b%T0l\;<ptrload> 2,%3"
-  [(set_attr "type" "jmpreg")
-   (set_attr "length" "12")])
-
-;; Like call_indirect_aix<ptrsize>, but no use of the static chain
-;; Operand0 is the addresss of the function to call
-;; Operand1 is the flag for System V.4 for unprototyped or FP registers
-;; Operand2 is the location in the function descriptor to load r2 from
-;; Operand3 is the stack location to hold the current TOC pointer
-
-(define_insn "call_indirect_aix<ptrsize>_nor11"
-  [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
-	 (match_operand 1 "" "g,g"))
-   (use (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
-   (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
-   (clobber (reg:P LR_REGNO))]
-  "DEFAULT_ABI == ABI_AIX && !TARGET_POINTERS_TO_NESTED_FUNCTIONS"
-  "<ptrload> 2,%2\;b%T0l\;<ptrload> 2,%3"
-  [(set_attr "type" "jmpreg")
-   (set_attr "length" "12")])
-
-;; Operand0 is the return result of the function
-;; Operand1 is the addresss of the function to call
-;; Operand2 is the flag for System V.4 for unprototyped or FP registers
-;; Operand3 is the location in the function descriptor to load r2 from
-;; Operand4 is the stack location to hold the current TOC pointer
-
-(define_insn "call_value_indirect_aix<ptrsize>"
-  [(set (match_operand 0 "" "")
-	(call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
-	      (match_operand 2 "" "g,g")))
-   (use (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
-   (set (reg:P TOC_REGNUM) (match_operand:P 4 "memory_operand" "<ptrm>,<ptrm>"))
-   (use (reg:P STATIC_CHAIN_REGNUM))
-   (clobber (reg:P LR_REGNO))]
-  "DEFAULT_ABI == ABI_AIX && TARGET_POINTERS_TO_NESTED_FUNCTIONS"
-  "<ptrload> 2,%3\;b%T1l\;<ptrload> 2,%4"
-  [(set_attr "type" "jmpreg")
-   (set_attr "length" "12")])
-
-;; Like call_value_indirect_aix<ptrsize>, but no use of the static chain
-;; Operand0 is the return result of the function
-;; Operand1 is the addresss of the function to call
-;; Operand2 is the flag for System V.4 for unprototyped or FP registers
-;; Operand3 is the location in the function descriptor to load r2 from
-;; Operand4 is the stack location to hold the current TOC pointer
-
-(define_insn "call_value_indirect_aix<ptrsize>_nor11"
-  [(set (match_operand 0 "" "")
-	(call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
-	      (match_operand 2 "" "g,g")))
-   (use (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
-   (set (reg:P TOC_REGNUM) (match_operand:P 4 "memory_operand" "<ptrm>,<ptrm>"))
-   (clobber (reg:P LR_REGNO))]
-  "DEFAULT_ABI == ABI_AIX && !TARGET_POINTERS_TO_NESTED_FUNCTIONS"
-  "<ptrload> 2,%3\;b%T1l\;<ptrload> 2,%4"
-  [(set_attr "type" "jmpreg")
-   (set_attr "length" "12")])
-
-;; Call to function which may be in another module.  Restore the TOC
-;; pointer (r2) after the call unless this is System V.
-;; Operand2 is nonzero if we are using the V.4 calling sequence and
-;; either the function was not prototyped, or it was prototyped as a
-;; variable argument function.  It is > 0 if FP registers were passed
-;; and < 0 if they were not.
-
-(define_insn "*call_nonlocal_aix32"
-  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "s"))
-	 (match_operand 1 "" "g"))
-   (use (match_operand:SI 2 "immediate_operand" "O"))
-   (clobber (reg:SI LR_REGNO))]
-  "TARGET_32BIT
-   && DEFAULT_ABI == ABI_AIX
-   && (INTVAL (operands[2]) & CALL_LONG) == 0"
-  "bl %z0\;nop"
-  [(set_attr "type" "branch")
-   (set_attr "length" "8")])
-   
-(define_insn "*call_nonlocal_aix64"
-  [(call (mem:SI (match_operand:DI 0 "symbol_ref_operand" "s"))
-	 (match_operand 1 "" "g"))
-   (use (match_operand:SI 2 "immediate_operand" "O"))
-   (clobber (reg:SI LR_REGNO))]
-  "TARGET_64BIT
-   && DEFAULT_ABI == ABI_AIX
-   && (INTVAL (operands[2]) & CALL_LONG) == 0"
-  "bl %z0\;nop"
-  [(set_attr "type" "branch")
-   (set_attr "length" "8")])
-
-(define_insn "*call_value_nonlocal_aix32"
-  [(set (match_operand 0 "" "")
-	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "s"))
-	      (match_operand 2 "" "g")))
-   (use (match_operand:SI 3 "immediate_operand" "O"))
-   (clobber (reg:SI LR_REGNO))]
-  "TARGET_32BIT
-   && DEFAULT_ABI == ABI_AIX
-   && (INTVAL (operands[3]) & CALL_LONG) == 0"
-  "bl %z1\;nop"
-  [(set_attr "type" "branch")
-   (set_attr "length" "8")])
-
-(define_insn "*call_value_nonlocal_aix64"
-  [(set (match_operand 0 "" "")
-	(call (mem:SI (match_operand:DI 1 "symbol_ref_operand" "s"))
-	      (match_operand 2 "" "g")))
-   (use (match_operand:SI 3 "immediate_operand" "O"))
-   (clobber (reg:SI LR_REGNO))]
-  "TARGET_64BIT
-   && DEFAULT_ABI == ABI_AIX
-   && (INTVAL (operands[3]) & CALL_LONG) == 0"
-  "bl %z1\;nop"
-  [(set_attr "type" "branch")
-   (set_attr "length" "8")])
 
 ;; A function pointer under System V is just a normal pointer
 ;; operands[0] is the function pointer
@@ -12576,6 +12419,104 @@
   [(set_attr "type" "branch,branch")
    (set_attr "length" "4,8")])
 
+
+;; Call to AIX abi function in the same module.
+
+(define_insn "*call_local_aix<mode>"
+  [(call (mem:SI (match_operand:P 0 "current_file_function_operand" "s"))
+	 (match_operand 1 "" "g"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "bl %z0"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*call_value_local_aix<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "current_file_function_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "bl %z1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+;; Call to AIX abi function which may be in another module.
+;; Restore the TOC pointer (r2) after the call.
+
+(define_insn "*call_nonlocal_aix<mode>"
+  [(call (mem:SI (match_operand:P 0 "symbol_ref_operand" "s"))
+	 (match_operand 1 "" "g"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "bl %z0\;nop"
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+(define_insn "*call_value_nonlocal_aix<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "bl %z1\;nop"
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+;; Call to indirect functions with the AIX abi using a 3 word descriptor.
+;; Operand0 is the addresss of the function to call
+;; Operand2 is the location in the function descriptor to load r2 from
+;; Operand3 is the stack location to hold the current TOC pointer
+
+(define_insn "*call_indirect_aix<mode>"
+  [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
+   (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX"
+  "<ptrload> 2,%2\;b%T0l\;<ptrload> 2,%3"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "12")])
+
+(define_insn "*call_value_indirect_aix<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
+   (set (reg:P TOC_REGNUM) (match_operand:P 4 "memory_operand" "<ptrm>,<ptrm>"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX"
+  "<ptrload> 2,%3\;b%T1l\;<ptrload> 2,%4"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "12")])
+
+;; Call to indirect functions with the ELFv2 ABI.
+;; Operand0 is the addresss of the function to call
+;; Operand2 is the stack location to hold the current TOC pointer
+
+(define_insn "*call_indirect_elfv2<mode>"
+  [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
+	 (match_operand 1 "" "g,g"))
+   (set (reg:P TOC_REGNUM) (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_ELFv2"
+  "b%T0l\;<ptrload> 2,%2"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "8")])
+
+(define_insn "*call_value_indirect_elfv2<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
+	      (match_operand 2 "" "g,g")))
+   (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_ELFv2"
+  "b%T1l\;<ptrload> 2,%3"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "8")])
+
+
 ;; Call subroutine returning any type.
 (define_expand "untyped_call"
   [(parallel [(call (match_operand 0 "" "")
@@ -12623,6 +12564,39 @@
   gcc_assert (GET_CODE (operands[1]) == CONST_INT);
 
   operands[0] = XEXP (operands[0], 0);
+
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+      rs6000_sibcall_aix (NULL_RTX, operands[0], operands[1], operands[2]);
+      DONE;
+    }
+}")
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "register_operand" "")
+		(call (mem:SI (match_operand 1 "address_operand" ""))
+		      (match_operand 2 "" "")))
+	      (use (match_operand 3 "" ""))
+	      (use (reg:SI LR_REGNO))
+	      (simple_return)])]
+  ""
+  "
+{
+#if TARGET_MACHO
+  if (MACHOPIC_INDIRECT)
+    operands[1] = machopic_indirect_call_target (operands[1]);
+#endif
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+
+  operands[1] = XEXP (operands[1], 0);
+
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+      rs6000_sibcall_aix (operands[0], operands[1], operands[2], operands[3]);
+      DONE;
+    }
 }")
 
 ;; this and similar patterns must be marked as using LR, otherwise
@@ -12690,7 +12664,6 @@
   [(set_attr "type" "branch")
    (set_attr "length" "4,8")])
 
-
 (define_insn "*sibcall_value_local64"
   [(set (match_operand 0 "" "")
 	(call (mem:SI (match_operand:DI 1 "current_file_function_operand" "s,s"))
@@ -12712,35 +12685,6 @@
   [(set_attr "type" "branch")
    (set_attr "length" "4,8")])
 
-(define_insn "*sibcall_nonlocal_aix<mode>"
-  [(call (mem:SI (match_operand:P 0 "call_operand" "s,c"))
-	 (match_operand 1 "" "g,g"))
-   (use (match_operand:SI 2 "immediate_operand" "O,O"))
-   (use (reg:SI LR_REGNO))
-   (simple_return)]
-  "DEFAULT_ABI == ABI_AIX
-   && (INTVAL (operands[2]) & CALL_LONG) == 0"
-  "@
-   b %z0
-   b%T0"
-  [(set_attr "type" "branch")
-   (set_attr "length" "4")])
-
-(define_insn "*sibcall_value_nonlocal_aix<mode>"
-  [(set (match_operand 0 "" "")
-	(call (mem:SI (match_operand:P 1 "call_operand" "s,c"))
-	      (match_operand 2 "" "g,g")))
-   (use (match_operand:SI 3 "immediate_operand" "O,O"))
-   (use (reg:SI LR_REGNO))
-   (simple_return)]
-  "DEFAULT_ABI == ABI_AIX
-   && (INTVAL (operands[3]) & CALL_LONG) == 0"
-  "@
-   b %z1
-   b%T1"
-  [(set_attr "type" "branch")
-   (set_attr "length" "4")])
-
 (define_insn "*sibcall_nonlocal_sysv<mode>"
   [(call (mem:SI (match_operand:P 0 "call_operand" "s,s,c,c"))
 	 (match_operand 1 "" ""))
@@ -12771,27 +12715,6 @@
   [(set_attr "type" "branch")
    (set_attr "length" "4,8,4,8")])
 
-(define_expand "sibcall_value"
-  [(parallel [(set (match_operand 0 "register_operand" "")
-		(call (mem:SI (match_operand 1 "address_operand" ""))
-		      (match_operand 2 "" "")))
-	      (use (match_operand 3 "" ""))
-	      (use (reg:SI LR_REGNO))
-	      (simple_return)])]
-  ""
-  "
-{
-#if TARGET_MACHO
-  if (MACHOPIC_INDIRECT)
-    operands[1] = machopic_indirect_call_target (operands[1]);
-#endif
-
-  gcc_assert (GET_CODE (operands[1]) == MEM);
-  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
-
-  operands[1] = XEXP (operands[1], 0);
-}")
-
 (define_insn "*sibcall_value_nonlocal_sysv<mode>"
   [(set (match_operand 0 "" "")
 	(call (mem:SI (match_operand:P 1 "call_operand" "s,s,c,c"))
@@ -12823,6 +12746,31 @@
   [(set_attr "type" "branch")
    (set_attr "length" "4,8,4,8")])
 
+;; AIX ABI sibling call patterns.
+
+(define_insn "*sibcall_aix<mode>"
+  [(call (mem:SI (match_operand:P 0 "call_operand" "s,c"))
+	 (match_operand 1 "" "g,g"))
+   (simple_return)]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "@
+   b %z0
+   b%T0"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_value_aix<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "call_operand" "s,c"))
+	      (match_operand 2 "" "g,g")))
+   (simple_return)]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "@
+   b %z1
+   b%T1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
 (define_expand "sibcall_epilogue"
   [(use (const_int 0))]
   ""
@@ -15051,6 +14999,14 @@
   "mfcr %0"
   [(set_attr "type" "mfcr")])
 
+(define_insn "*crsave"
+  [(match_parallel 0 "crsave_operation"
+		   [(set (match_operand:SI 1 "memory_operand" "=m")
+			 (match_operand:SI 2 "gpc_reg_operand" "r"))])]
+  ""
+  "stw %2,%1"
+  [(set_attr "type" "store")])
+
 (define_insn "*stmw"
   [(match_parallel 0 "stmw_operation"
 		   [(set (match_operand:SI 1 "memory_operand" "=m")
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 94e4b3883dc..347f76e207c 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -369,6 +369,14 @@ mabi=no-spe
 Target RejectNegative Var(rs6000_spe_abi, 0)
 Do not use the SPE ABI extensions
 
+mabi=elfv1
+Target RejectNegative Var(rs6000_elf_abi, 1) Save
+Use the ELFv1 ABI
+
+mabi=elfv2
+Target RejectNegative Var(rs6000_elf_abi, 2)
+Use the ELFv2 ABI
+
 ; These are here for testing during development only, do not document
 ; in the manual please.
 
diff --git a/gcc/config/rs6000/sysv4.h b/gcc/config/rs6000/sysv4.h
index 0cc8ffb8cb3..73c3ec16c2c 100644
--- a/gcc/config/rs6000/sysv4.h
+++ b/gcc/config/rs6000/sysv4.h
@@ -45,7 +45,7 @@
 				      & (OPTION_MASK_RELOCATABLE	\
 					 | OPTION_MASK_MINIMAL_TOC))	\
 				     && flag_pic > 1)			\
-				 || DEFAULT_ABI == ABI_AIX)
+				 || DEFAULT_ABI != ABI_V4)
 
 #define	TARGET_BITFIELD_TYPE	(! TARGET_NO_BITFIELD_TYPE)
 #define	TARGET_BIG_ENDIAN	(! TARGET_LITTLE_ENDIAN)
@@ -147,7 +147,7 @@ do {									\
 	     rs6000_sdata_name);					\
     }									\
 									\
-  else if (flag_pic && DEFAULT_ABI != ABI_AIX				\
+  else if (flag_pic && DEFAULT_ABI == ABI_V4				\
 	   && (rs6000_sdata == SDATA_EABI				\
 	       || rs6000_sdata == SDATA_SYSV))				\
     {									\
@@ -173,14 +173,14 @@ do {									\
       error ("-mrelocatable and -mno-minimal-toc are incompatible");	\
     }									\
 									\
-  if (TARGET_RELOCATABLE && rs6000_current_abi == ABI_AIX)		\
+  if (TARGET_RELOCATABLE && rs6000_current_abi != ABI_V4)		\
     {									\
       rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;			\
       error ("-mrelocatable and -mcall-%s are incompatible",		\
 	     rs6000_abi_name);						\
     }									\
 									\
-  if (!TARGET_64BIT && flag_pic > 1 && rs6000_current_abi == ABI_AIX)	\
+  if (!TARGET_64BIT && flag_pic > 1 && rs6000_current_abi != ABI_V4)	\
     {									\
       flag_pic = 0;							\
       error ("-fPIC and -mcall-%s are incompatible",			\
@@ -193,7 +193,7 @@ do {									\
     }									\
 									\
   /* Treat -fPIC the same as -mrelocatable.  */				\
-  if (flag_pic > 1 && DEFAULT_ABI != ABI_AIX)				\
+  if (flag_pic > 1 && DEFAULT_ABI == ABI_V4)				\
     {									\
       rs6000_isa_flags |= OPTION_MASK_RELOCATABLE | OPTION_MASK_MINIMAL_TOC; \
       TARGET_NO_FP_IN_TOC = 1;						\
@@ -317,7 +317,7 @@ do {									\
 
 /* Put PC relative got entries in .got2.  */
 #define	MINIMAL_TOC_SECTION_ASM_OP \
-  (TARGET_RELOCATABLE || (flag_pic && DEFAULT_ABI != ABI_AIX)		\
+  (TARGET_RELOCATABLE || (flag_pic && DEFAULT_ABI == ABI_V4)		\
    ? "\t.section\t\".got2\",\"aw\"" : "\t.section\t\".got1\",\"aw\"")
 
 #define	SDATA_SECTION_ASM_OP "\t.section\t\".sdata\",\"aw\""
@@ -538,12 +538,7 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
 
 #define	CC1_ENDIAN_BIG_SPEC ""
 
-#define	CC1_ENDIAN_LITTLE_SPEC "\
-%{!mstrict-align: %{!mno-strict-align: \
-    %{!mcall-i960-old: \
-	-mstrict-align \
-    } \
-}}"
+#define	CC1_ENDIAN_LITTLE_SPEC ""
 
 #define	CC1_ENDIAN_DEFAULT_SPEC "%(cc1_endian_big)"
 
diff --git a/gcc/config/rs6000/sysv4le.h b/gcc/config/rs6000/sysv4le.h
index 3901122a738..ba56004cdfb 100644
--- a/gcc/config/rs6000/sysv4le.h
+++ b/gcc/config/rs6000/sysv4le.h
@@ -34,3 +34,7 @@
 
 #undef	MULTILIB_DEFAULTS
 #define	MULTILIB_DEFAULTS { "mlittle", "mcall-sysv" }
+
+/* Little-endian PowerPC64 Linux uses the ELF v2 ABI by default.  */
+#define LINUX64_DEFAULT_ABI_ELFv2
+
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 68214d91316..9d2bcc1ba48 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -108,6 +108,7 @@
   if (!BYTES_BIG_ENDIAN
       && VECTOR_MEM_VSX_P (<MODE>mode)
       && <MODE>mode != TImode
+      && !gpr_or_gpr_p (operands[0], operands[1])
       && (memory_operand (operands[0], <MODE>mode)
           ^ memory_operand (operands[1], <MODE>mode)))
     {
@@ -830,12 +831,7 @@
 
   emit_insn (gen_vsx_xvcvdpsp (r1, operands[1]));
   emit_insn (gen_vsx_xvcvdpsp (r2, operands[2]));
-
-  if (BYTES_BIG_ENDIAN)
-    rs6000_expand_extract_even (operands[0], r1, r2);
-  else
-    rs6000_expand_extract_even (operands[0], r2, r1);
-
+  rs6000_expand_extract_even (operands[0], r1, r2);
   DONE;
 })
 
@@ -850,12 +846,7 @@
 
   emit_insn (gen_vsx_xvcvdpsxws (r1, operands[1]));
   emit_insn (gen_vsx_xvcvdpsxws (r2, operands[2]));
-
-  if (BYTES_BIG_ENDIAN)
-    rs6000_expand_extract_even (operands[0], r1, r2);
-  else
-    rs6000_expand_extract_even (operands[0], r2, r1);
-
+  rs6000_expand_extract_even (operands[0], r1, r2);
   DONE;
 })
 
@@ -870,12 +861,7 @@
 
   emit_insn (gen_vsx_xvcvdpuxws (r1, operands[1]));
   emit_insn (gen_vsx_xvcvdpuxws (r2, operands[2]));
-
-  if (BYTES_BIG_ENDIAN)
-    rs6000_expand_extract_even (operands[0], r1, r2);
-  else
-    rs6000_expand_extract_even (operands[0], r2, r1);
-
+  rs6000_expand_extract_even (operands[0], r1, r2);
   DONE;
 })
 
@@ -966,8 +952,8 @@
     	      				 operands[2], operands[3]));
   else
     {
-      /* Avoid the "subtract from splat31" workaround for vperm since
-         we have changed lvsr to lvsl instead.  */
+      /* We have changed lvsr to lvsl, so to complete the transformation
+         of vperm for LE, we must swap the inputs.  */
       rtx unspec = gen_rtx_UNSPEC (<MODE>mode,
                                    gen_rtvec (3, operands[2],
                                               operands[1], operands[3]),
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 8a51afb3c86..977ef963411 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1497,9 +1497,10 @@
 		      UNSPEC_VSX_SET))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
-  if (INTVAL (operands[3]) == 0)
+  int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
+  if (INTVAL (operands[3]) == idx_first)
     return \"xxpermdi %x0,%x2,%x1,1\";
-  else if (INTVAL (operands[3]) == 1)
+  else if (INTVAL (operands[3]) == 1 - idx_first)
     return \"xxpermdi %x0,%x1,%x2,0\";
   else
     gcc_unreachable ();
@@ -1514,8 +1515,12 @@
 			[(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
+  int fldDM;
   gcc_assert (UINTVAL (operands[2]) <= 1);
-  operands[3] = GEN_INT (INTVAL (operands[2]) << 1);
+  fldDM = INTVAL (operands[2]) << 1;
+  if (!BYTES_BIG_ENDIAN)
+    fldDM = 3 - fldDM;
+  operands[3] = GEN_INT (fldDM);
   return \"xxpermdi %x0,%x1,%x1,%3\";
 }
   [(set_attr "type" "vecperm")])
@@ -1535,6 +1540,21 @@
 	(const_string "fpload")))
    (set_attr "length" "4")])  
 
+;; Optimize extracting element 1 from memory for little endian
+(define_insn "*vsx_extract_<mode>_one_le"
+  [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
+	(vec_select:<VS_scalar>
+	 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z")
+	 (parallel [(const_int 1)])))]
+  "VECTOR_MEM_VSX_P (<MODE>mode) && !WORDS_BIG_ENDIAN"
+  "lxsd%U1x %x0,%y1"
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	(const_string "fpload_ux")
+	(const_string "fpload")))
+   (set_attr "length" "4")])  
+
 ;; Extract a SF element from V4SF
 (define_insn_and_split "vsx_extract_v4sf"
   [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
@@ -1555,7 +1575,7 @@
   rtx op2 = operands[2];
   rtx op3 = operands[3];
   rtx tmp;
-  HOST_WIDE_INT ele = INTVAL (op2);
+  HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
 
   if (ele == 0)
     tmp = op1;
diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c
index 90d89bbc5e1..59640d8c8fa 100644
--- a/gcc/config/rx/rx.c
+++ b/gcc/config/rx/rx.c
@@ -27,6 +27,9 @@
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
 #include "rtl.h"
 #include "regs.h"
 #include "hard-reg-set.h"
@@ -344,9 +347,9 @@ rx_mode_dependent_address_p (const_rtx addr, addr_space_t as ATTRIBUTE_UNUSED)
 
 	case CONST_INT:
 	  /* REG+INT is only mode independent if INT is a
-	     multiple of 4, positive and will fit into 8-bits.  */
+	     multiple of 4, positive and will fit into 16-bits.  */
 	  if (((INTVAL (addr) & 3) == 0)
-	      && IN_RANGE (INTVAL (addr), 4, 252))
+	      && IN_RANGE (INTVAL (addr), 4, 0xfffc))
 	    return false;
 	  return true;
 
diff --git a/gcc/config/s390/htmxlintrin.h b/gcc/config/s390/htmxlintrin.h
index 800d5f0aa0c..d1c7ec566e1 100644
--- a/gcc/config/s390/htmxlintrin.h
+++ b/gcc/config/s390/htmxlintrin.h
@@ -33,13 +33,20 @@ extern "C" {
    the IBM XL compiler.  For documentation please see the "z/OS XL
    C/C++ Programming Guide" publicly available on the web.  */
 
-extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+/* FIXME: __TM_simple_begin and __TM_begin should be marked
+   __always_inline__ as well but this currently produces an error
+   since the tbegin builtins are "returns_twice" and setjmp_call_p
+   (calls.c) therefore identifies the functions as calling setjmp.
+   The tree inliner currently refuses to inline functions calling
+   setjmp.  */
+
+long
 __TM_simple_begin ()
 {
   return __builtin_tbegin_nofloat (0);
 }
 
-extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+long
 __TM_begin (void* const tdb)
 {
   return __builtin_tbegin_nofloat (tdb);
@@ -78,7 +85,7 @@ __TM_nesting_depth (void* const tdb_ptr)
   if (depth != 0)
     return depth;
 
-  if (tdb->format == 0)
+  if (tdb->format != 1)
     return 0;
   return tdb->nesting_depth;
 }
@@ -90,7 +97,7 @@ __TM_is_user_abort (void* const tdb_ptr)
 {
   struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
 
-  if (tdb->format == 0)
+  if (tdb->format != 1)
     return 0;
 
   return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE);
@@ -101,7 +108,7 @@ __TM_is_named_user_abort (void* const tdb_ptr, unsigned char* code)
 {
   struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
 
-  if (tdb->format == 0)
+  if (tdb->format != 1)
     return 0;
 
   if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE)
@@ -117,7 +124,7 @@ __TM_is_illegal (void* const tdb_ptr)
 {
   struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
 
-  return (tdb->format == 0
+  return (tdb->format == 1
 	  && (tdb->abort_code == 4 /* unfiltered program interruption */
 	      || tdb->abort_code == 11 /* restricted instruction */));
 }
@@ -127,7 +134,7 @@ __TM_is_footprint_exceeded (void* const tdb_ptr)
 {
   struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
 
-  return (tdb->format == 0
+  return (tdb->format == 1
 	  && (tdb->abort_code == 7 /* fetch overflow */
 	      || tdb->abort_code == 8 /* store overflow */));
 }
@@ -137,7 +144,7 @@ __TM_is_nested_too_deep (void* const tdb_ptr)
 {
   struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
 
-  return tdb->format == 0 && tdb->abort_code == 13; /* depth exceeded */
+  return tdb->format == 1 && tdb->abort_code == 13; /* depth exceeded */
 }
 
 extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -145,7 +152,7 @@ __TM_is_conflict (void* const tdb_ptr)
 {
   struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
 
-  return (tdb->format == 0
+  return (tdb->format == 1
 	  && (tdb->abort_code == 9 /* fetch conflict */
 	      || tdb->abort_code == 10 /* store conflict */));
 }
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index f0d6a59e978..62d162ab087 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -26,6 +26,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "print-tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
 #include "tm_p.h"
 #include "regs.h"
 #include "hard-reg-set.h"
@@ -48,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "langhooks.h"
 #include "optabs.h"
 #include "gimple.h"
+#include "gimplify.h"
 #include "df.h"
 #include "params.h"
 #include "cfgloop.h"
@@ -894,7 +900,8 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 	{
 	  /* For CCRAWmode put the required cc mask into the second
 	     operand.  */
-	  if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode)
+        if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
+            && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
 	    *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
 	  *op0 = XVECEXP (*op0, 0, 0);
 	  *code = new_code;
@@ -7963,10 +7970,13 @@ s390_optimize_nonescaping_tx (void)
   if (!cfun->machine->tbegin_p)
     return;
 
-  for (bb_index = 0; bb_index < n_basic_blocks; bb_index++)
+  for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
     {
       bb = BASIC_BLOCK (bb_index);
 
+      if (!bb)
+	continue;
+
       FOR_BB_INSNS (bb, insn)
 	{
 	  rtx ite, cc, pat, target;
@@ -8080,7 +8090,10 @@ s390_optimize_nonescaping_tx (void)
   if (!result)
     return;
 
-  PATTERN (tbegin_insn) = XVECEXP (PATTERN (tbegin_insn), 0, 0);
+  PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
+			    gen_rtvec (2,
+				       XVECEXP (PATTERN (tbegin_insn), 0, 0),
+				       XVECEXP (PATTERN (tbegin_insn), 0, 1)));
   INSN_CODE (tbegin_insn) = -1;
   df_insn_rescan (tbegin_insn);
 
@@ -9792,6 +9805,7 @@ s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
   const int CC3 = 1 << 0;
   rtx abort_label = gen_label_rtx ();
   rtx leave_label = gen_label_rtx ();
+  rtx retry_plus_two = gen_reg_rtx (SImode);
   rtx retry_reg = gen_reg_rtx (SImode);
   rtx retry_label = NULL_RTX;
   rtx jump;
@@ -9800,16 +9814,17 @@ s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
   if (retry != NULL_RTX)
     {
       emit_move_insn (retry_reg, retry);
+      emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
+      emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
       retry_label = gen_label_rtx ();
       emit_label (retry_label);
     }
 
   if (clobber_fprs_p)
-    emit_insn (gen_tbegin_1 (tdb,
-		 gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK)));
+    emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb));
   else
-    emit_insn (gen_tbegin_nofloat_1 (tdb,
-		 gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK)));
+    emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
+				     tdb));
 
   jump = s390_emit_jump (abort_label,
 			 gen_rtx_NE (VOIDmode,
@@ -9830,6 +9845,10 @@ s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
   /* Abort handler code.  */
 
   emit_label (abort_label);
+  emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
+					gen_rtvec (1, gen_rtx_REG (CCRAWmode,
+								   CC_REGNUM)),
+					UNSPEC_CC_TO_INT));
   if (retry != NULL_RTX)
     {
       rtx count = gen_reg_rtx (SImode);
@@ -9841,7 +9860,7 @@ s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
       add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
 
       /* CC2 - transient failure. Perform retry with ppa.  */
-      emit_move_insn (count, retry);
+      emit_move_insn (count, retry_plus_two);
       emit_insn (gen_subsi3 (count, count, retry_reg));
       emit_insn (gen_tx_assist (count));
       jump = emit_jump_insn (gen_doloop_si64 (retry_label,
@@ -9851,10 +9870,6 @@ s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
       LABEL_NUSES (retry_label) = 1;
     }
 
-  emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
-					gen_rtvec (1, gen_rtx_REG (CCRAWmode,
-								   CC_REGNUM)),
-					UNSPEC_CC_TO_INT));
   emit_label (leave_label);
 }
 
@@ -9893,6 +9908,9 @@ static void
 s390_init_builtins (void)
 {
   tree ftype, uint64_type;
+  tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
+				       NULL, NULL);
+  tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
 
   /* void foo (void) */
   ftype = build_function_type_list (void_type_node, NULL_TREE);
@@ -9903,17 +9921,17 @@ s390_init_builtins (void)
   ftype = build_function_type_list (void_type_node, integer_type_node,
 				    NULL_TREE);
   add_builtin_function ("__builtin_tabort", ftype,
-			S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, NULL_TREE);
+			S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, noreturn_attr);
   add_builtin_function ("__builtin_tx_assist", ftype,
 			S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE);
 
   /* int foo (void *) */
   ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE);
   add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN,
-			BUILT_IN_MD, NULL, NULL_TREE);
+			BUILT_IN_MD, NULL, returns_twice_attr);
   add_builtin_function ("__builtin_tbegin_nofloat", ftype,
 			S390_BUILTIN_TBEGIN_NOFLOAT,
-			BUILT_IN_MD, NULL, NULL_TREE);
+			BUILT_IN_MD, NULL, returns_twice_attr);
 
   /* int foo (void *, int) */
   ftype = build_function_type_list (integer_type_node, ptr_type_node,
@@ -9921,11 +9939,11 @@ s390_init_builtins (void)
   add_builtin_function ("__builtin_tbegin_retry", ftype,
 			S390_BUILTIN_TBEGIN_RETRY,
 			BUILT_IN_MD,
-			NULL, NULL_TREE);
+			NULL, returns_twice_attr);
   add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype,
 			S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
 			BUILT_IN_MD,
-			NULL, NULL_TREE);
+			NULL, returns_twice_attr);
 
   /* int foo (void) */
   ftype = build_function_type_list (integer_type_node, NULL_TREE);
@@ -10192,9 +10210,9 @@ s390_encode_section_info (tree decl, rtx rtl, int first)
 	SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
       if (!DECL_SIZE (decl)
 	  || !DECL_ALIGN (decl)
-	  || !host_integerp (DECL_SIZE (decl), 0)
+	  || !tree_fits_shwi_p (DECL_SIZE (decl))
 	  || (DECL_ALIGN (decl) <= 64
-	      && DECL_ALIGN (decl) != tree_low_cst (DECL_SIZE (decl), 0)))
+	      && DECL_ALIGN (decl) != tree_to_shwi (DECL_SIZE (decl))))
 	SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
     }
 
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 8354e263892..d537d29d24f 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -155,6 +155,7 @@
 
    ; Transactional Execution support
    UNSPECV_TBEGIN
+   UNSPECV_TBEGIN_TDB
    UNSPECV_TBEGINC
    UNSPECV_TEND
    UNSPECV_TABORT
@@ -9997,9 +9998,10 @@
 
 (define_insn "tbegin_1"
   [(set (reg:CCRAW CC_REGNUM)
-	(unspec_volatile:CCRAW [(match_operand:BLK 0 "memory_operand"    "=Q")
-				(match_operand     1 "const_int_operand" " D")]
+	(unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" "D")]
 			       UNSPECV_TBEGIN))
+   (set (match_operand:BLK 1 "memory_operand" "=Q")
+	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_TBEGIN_TDB))
    (clobber (reg:DF 16))
    (clobber (reg:DF 17))
    (clobber (reg:DF 18))
@@ -10018,18 +10020,19 @@
    (clobber (reg:DF 31))]
 ; CONST_OK_FOR_CONSTRAINT_P does not work with D constraint since D is
 ; not supposed to be used for immediates (see genpreds.c).
-  "TARGET_HTM && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 0xffff"
-  "tbegin\t%0,%x1"
+  "TARGET_HTM && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 0xffff"
+  "tbegin\t%1,%x0"
   [(set_attr "op_type" "SIL")])
 
 ; Same as above but without the FPR clobbers
 (define_insn "tbegin_nofloat_1"
   [(set (reg:CCRAW CC_REGNUM)
-	(unspec_volatile:CCRAW [(match_operand:BLK 0 "memory_operand"    "=Q")
-				(match_operand     1 "const_int_operand" " D")]
-			       UNSPECV_TBEGIN))]
-  "TARGET_HTM && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 0xffff"
-  "tbegin\t%0,%x1"
+	(unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" "D")]
+			       UNSPECV_TBEGIN))
+   (set (match_operand:BLK 1 "memory_operand" "=Q")
+	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_TBEGIN_TDB))]
+  "TARGET_HTM && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 0xffff"
+  "tbegin\t%1,%x0"
   [(set_attr "op_type" "SIL")])
 
 
@@ -10113,15 +10116,12 @@
 ; Transaction perform processor assist
 
 (define_expand "tx_assist"
-  [(set (match_dup 1) (const_int 0))
-   (unspec_volatile [(match_operand:SI 0 "register_operand" "")
-		     (match_dup 1)
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "")
+		     (reg:SI GPR0_REGNUM)
 		     (const_int 1)]
 		    UNSPECV_PPA)]
   "TARGET_HTM"
-{
-  operands[1] = gen_reg_rtx (SImode);
-})
+  "")
 
 (define_insn "*ppa"
   [(unspec_volatile [(match_operand:SI 0 "register_operand" "d")
@@ -10129,5 +10129,5 @@
 		     (match_operand 2 "const_int_operand" "I")]
 		    UNSPECV_PPA)]
   "TARGET_HTM && INTVAL (operands[2]) < 16"
-  "ppa\t%0,%1,1"
+  "ppa\t%0,%1,%2"
   [(set_attr "op_type" "RRF")])
diff --git a/gcc/config/score/score.c b/gcc/config/score/score.c
index c25aaa2da93..3fdf2ea9050 100644
--- a/gcc/config/score/score.c
+++ b/gcc/config/score/score.c
@@ -32,6 +32,10 @@
 #include "diagnostic-core.h"
 #include "output.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "calls.h"
+#include "varasm.h"
+#include "stor-layout.h"
 #include "function.h"
 #include "expr.h"
 #include "optabs.h"
diff --git a/gcc/config/sh/sh-c.c b/gcc/config/sh/sh-c.c
index 4f3a41a46e7..0d7937f4822 100644
--- a/gcc/config/sh/sh-c.c
+++ b/gcc/config/sh/sh-c.c
@@ -23,6 +23,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "tm_p.h"
 #include "cpplib.h"
 #include "c-family/c-common.h"
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 167b615e37b..6981ab6229a 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -26,6 +26,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "insn-config.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
 #include "flags.h"
 #include "expr.h"
 #include "optabs.h"
@@ -49,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "ggc.h"
 #include "gimple.h"
+#include "gimplify.h"
 #include "cfgloop.h"
 #include "alloc-pool.h"
 #include "tm-constrs.h"
@@ -715,7 +720,9 @@ got_mode_name:;
 
 /* Register SH specific RTL passes.  */
 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
-				     const char* name);
+					    const char* name);
+extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
+						  const char* name);
 static void
 register_sh_passes (void)
 {
@@ -739,6 +746,13 @@ register_sh_passes (void)
      reordering as this sometimes creates new opportunities.  */
   register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
 		 PASS_POS_INSERT_AFTER, "split4", 1);
+
+  /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
+     is known after a conditional branch.
+     This must be done after basic blocks and branch conditions have
+     stabilized and won't be changed by further passes.  */
+  register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
+		 PASS_POS_INSERT_BEFORE, "sched2", 1);
 }
 
 /* Implement TARGET_OPTION_OVERRIDE macro.  Validate and override 
@@ -5774,24 +5788,21 @@ fixup_addr_diff_vecs (rtx first)
 int
 barrier_align (rtx barrier_or_label)
 {
-  rtx next = next_active_insn (barrier_or_label), pat, prev;
+  rtx next, pat;
 
-  if (! next)
+  if (! barrier_or_label)
     return 0;
 
-  pat = PATTERN (next);
-
-  if (GET_CODE (pat) == ADDR_DIFF_VEC)
+  if (LABEL_P (barrier_or_label)
+      && NEXT_INSN (barrier_or_label)
+      && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
     return 2;
 
-  if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
-    /* This is a barrier in front of a constant table.  */
-    return 0;
-
-  prev = prev_active_insn (barrier_or_label);
-  if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
+  if (BARRIER_P (barrier_or_label)
+      && PREV_INSN (barrier_or_label)
+      && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
     {
-      pat = PATTERN (prev);
+      pat = PATTERN (PREV_INSN (barrier_or_label));
       /* If this is a very small table, we want to keep the alignment after
 	 the table to the minimum for proper code alignment.  */
       return ((optimize_size
@@ -5800,6 +5811,17 @@ barrier_align (rtx barrier_or_label)
 	      ? 1 << TARGET_SHMEDIA : align_jumps_log);
     }
 
+  next = next_active_insn (barrier_or_label);
+
+  if (! next)
+    return 0;
+
+  pat = PATTERN (next);
+
+  if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
+    /* This is a barrier in front of a constant table.  */
+    return 0;
+
   if (optimize_size)
     return 0;
 
@@ -5824,13 +5846,12 @@ barrier_align (rtx barrier_or_label)
 	 (fill_eager_delay_slots) and the branch is to the insn after the insn
 	 after the barrier.  */
 
-      /* PREV is presumed to be the JUMP_INSN for the barrier under
-	 investigation.  Skip to the insn before it.  */
-
       int slot, credit;
       bool jump_to_next = false;
 
-      prev = prev_real_insn (prev);
+      /* Skip to the insn before the JUMP_INSN before the barrier under
+	 investigation.  */
+      rtx prev = prev_real_insn (prev_active_insn (barrier_or_label));
 
       for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
 	   credit >= 0 && prev && NONJUMP_INSN_P (prev);
diff --git a/gcc/config/sh/sh_optimize_sett_clrt.cc b/gcc/config/sh/sh_optimize_sett_clrt.cc
new file mode 100644
index 00000000000..abb75672e86
--- /dev/null
+++ b/gcc/config/sh/sh_optimize_sett_clrt.cc
@@ -0,0 +1,453 @@
+/* An SH specific RTL pass that tries to optimize clrt and sett insns.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "machmode.h"
+#include "basic-block.h"
+#include "df.h"
+#include "rtl.h"
+#include "insn-config.h"
+#include "tree-pass.h"
+#include "target.h"
+
+#include <vector>
+
+/*
+This pass tries to eliminate unnecessary sett or clrt instructions in cases
+where the ccreg value is already known to be the same as the constant set
+would set it to.  This is done as follows:
+
+Check every BB's insn and see if it's a sett or clrt.
+Once a sett or clrt insn is hit, walk insns and predecessor basic blocks
+backwards from that insn and determine all possible ccreg values from all
+basic block paths.
+Insns that set the ccreg value in some way (simple set, clobber etc) are
+recorded.  Conditional branches where one edge leads to the sett / clrt insn
+are also recorded, since for each edge in the conditional branch the ccreg
+value is known constant.
+After collecting all possible ccreg values at the sett / clrt insn, check that
+all the values are the same.  If that value is the same as the sett / clrt
+insn would set the ccreg to, the sett / clrt insn can be eliminated.
+*/
+
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// Helper functions
+
+#define log_msg(...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); } while (0)
+
+#define log_insn(i)\
+  do { if (dump_file != NULL) print_rtl_single (dump_file, \
+						(const_rtx)i); } while (0)
+
+#define log_rtx(r)\
+  do { if (dump_file != NULL) print_rtl (dump_file, (const_rtx)r); } while (0)
+
+#define log_return(retval, ...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); \
+       return retval; } while (0)
+
+#define log_return_void(...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); \
+       return; } while (0)
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// RTL pass class
+
+class sh_optimize_sett_clrt : public rtl_opt_pass
+{
+public:
+  sh_optimize_sett_clrt (gcc::context* ctx, const char* name);
+  virtual ~sh_optimize_sett_clrt (void);
+  virtual bool gate (void);
+  virtual unsigned int execute (void);
+
+private:
+  static const pass_data default_pass_data;
+
+  struct ccreg_value
+  {
+    // The insn at which the ccreg value was determined.
+    rtx insn;
+
+    // The basic block where the insn was discovered.
+    // Notice that the CFG might be invalid at late RTL stages and
+    // BLOCK_FOR_INSN might return null.  Thus the basic block are recorded
+    // here while traversing them.
+    basic_block bb;
+
+    // The value of ccreg.  If NULL_RTX, the value is not known, e.g. if the
+    // ccreg is clobbered.
+    rtx value;
+  };
+
+  // Update the mode of the captured m_ccreg with the specified mode.
+  void update_ccreg_mode (machine_mode m);
+
+  // Given an insn pattern, check if it sets the ccreg to a constant value
+  // of either zero or STORE_FLAG_VALUE.  If so, return the value rtx,
+  // NULL_RTX otherwise.
+  rtx const_setcc_value (rtx pat) const;
+
+  // Given a start insn and its basic block, recursively determine all
+  // possible ccreg values in all basic block paths that can lead to the
+  // start insn.
+  void find_last_ccreg_values (rtx start_insn, basic_block bb,
+			       std::vector<ccreg_value>& values_out,
+			       basic_block prev_visited_bb = NULL) const;
+
+  // Given a cbranch insn, its basic block and another basic block, determine
+  // the value to which the ccreg will be set after jumping/falling through to
+  // the specified target basic block.
+  bool sh_cbranch_ccreg_value (rtx cbranch_insn,
+			       basic_block cbranch_insn_bb,
+			       basic_block branch_target_bb) const;
+
+  // Check whether all of the ccreg values are the same.
+  static bool all_ccreg_values_equal (const std::vector<ccreg_value>& values);
+
+  // Remove REG_DEAD and REG_UNUSED notes from insns of the specified
+  // ccreg_value entries.
+  void remove_ccreg_dead_unused_notes (std::vector<ccreg_value>& values) const;
+
+  // rtx of the ccreg that is obtained from the target.
+  rtx m_ccreg;
+};
+
+const pass_data sh_optimize_sett_clrt::default_pass_data =
+{
+  RTL_PASS,		// type
+  "",			// name (overwritten by the constructor)
+  OPTGROUP_NONE,	// optinfo_flags
+  true,			// has_gate
+  true,			// has_execute
+  TV_OPTIMIZE,		// tv_id
+  0,			// properties_required
+  0,			// properties_provided
+  0,			// properties_destroyed
+  0,			// todo_flags_start
+  0			// todo_flags_finish
+};
+
+sh_optimize_sett_clrt::sh_optimize_sett_clrt (gcc::context* ctx,
+					      const char* name)
+: rtl_opt_pass (default_pass_data, ctx),
+  m_ccreg (NULL_RTX)
+{
+  // Overwrite default name in pass_data base class.
+  this->name = name;
+}
+
+sh_optimize_sett_clrt::~sh_optimize_sett_clrt (void)
+{
+}
+
+bool
+sh_optimize_sett_clrt::gate (void)
+{
+  return optimize > 0;
+}
+
+unsigned int
+sh_optimize_sett_clrt::execute (void)
+{
+  unsigned int ccr0 = INVALID_REGNUM;
+  unsigned int ccr1 = INVALID_REGNUM;
+
+  if (targetm.fixed_condition_code_regs (&ccr0, &ccr1)
+      && ccr0 != INVALID_REGNUM)
+    {
+      // Initially create a reg rtx with VOIDmode.
+      // When the constant setcc is discovered, the mode is changed
+      // to the mode that is actually used by the target.
+      m_ccreg = gen_rtx_REG (VOIDmode, ccr0);
+    }
+
+  if (m_ccreg == NULL_RTX)
+    log_return (0, "no ccreg.\n\n");
+
+  if (STORE_FLAG_VALUE != 1)
+    log_return (0, "unsupported STORE_FLAG_VALUE %d", STORE_FLAG_VALUE);
+
+  log_msg ("ccreg: ");
+  log_rtx (m_ccreg);
+  log_msg ("  STORE_FLAG_VALUE = %d\n", STORE_FLAG_VALUE);
+
+  if (!df_regs_ever_live_p (ccr0))
+    log_return (0, "ccreg never live\n\n");
+
+  // Output vector for find_known_ccreg_values.
+  std::vector<ccreg_value> ccreg_values;
+  ccreg_values.reserve (32);
+
+  // Look for insns that set the ccreg to a constant value and see if it can
+  // be optimized.
+  basic_block bb;
+  FOR_EACH_BB_REVERSE (bb)
+    for (rtx next_i, i = NEXT_INSN (BB_HEAD (bb));
+	 i != NULL_RTX && i != BB_END (bb); i = next_i)
+      {
+	next_i = NEXT_INSN (i);
+
+	if (!INSN_P (i) || !NONDEBUG_INSN_P (i))
+	  continue;
+
+	rtx setcc_val = const_setcc_value (PATTERN (i));
+	if (setcc_val != NULL_RTX)
+	  {
+	    update_ccreg_mode (GET_MODE (XEXP (PATTERN (i), 0)));
+
+	    log_msg ("\n\nfound const setcc insn in [bb %d]: \n", bb->index);
+	    log_insn (i);
+	    log_msg ("\n");
+
+	    ccreg_values.clear ();
+	    find_last_ccreg_values (PREV_INSN (i), bb, ccreg_values);
+
+	    log_msg ("number of ccreg values collected: %u\n",
+		     (unsigned int)ccreg_values.size ());
+
+	    // If all the collected values are equal and are equal to the
+	    // constant value of the setcc insn, the setcc insn can be
+	    // removed.
+	    if (all_ccreg_values_equal (ccreg_values)
+		&& rtx_equal_p (ccreg_values.front ().value, setcc_val))
+	      {
+		log_msg ("all values are ");
+		log_rtx (setcc_val);
+		log_msg ("\n");
+
+		delete_insn (i);
+		remove_ccreg_dead_unused_notes (ccreg_values);
+	      }
+	  }
+      }
+
+  log_return (0, "\n\n");
+}
+
+void
+sh_optimize_sett_clrt::update_ccreg_mode (machine_mode m)
+{
+  if (GET_MODE (m_ccreg) == m)
+    return;
+
+  PUT_MODE (m_ccreg, m);
+  log_msg ("updated ccreg mode: ");
+  log_rtx (m_ccreg);
+  log_msg ("\n\n");
+}
+
+rtx
+sh_optimize_sett_clrt::const_setcc_value (rtx pat) const
+{
+  if (GET_CODE (pat) == SET
+      && REG_P (XEXP (pat, 0)) && REGNO (XEXP (pat, 0)) == REGNO (m_ccreg)
+      && CONST_INT_P (XEXP (pat, 1))
+      && (INTVAL (XEXP (pat, 1)) == 0
+	  || INTVAL (XEXP (pat, 1)) == STORE_FLAG_VALUE))
+    return XEXP (pat, 1);
+  else
+    return NULL_RTX;
+}
+
+bool
+sh_optimize_sett_clrt
+::sh_cbranch_ccreg_value (rtx cbranch_insn, basic_block cbranch_insn_bb,
+			  basic_block branch_target_bb) const
+{
+  rtx pc_set_rtx = pc_set (cbranch_insn);
+  gcc_assert (pc_set_rtx != NULL_RTX);
+  gcc_assert (branch_target_bb != NULL);
+
+  rtx cond = XEXP (XEXP (pc_set_rtx, 1), 0);
+  bool branch_if;
+
+  if (GET_CODE (cond) == NE
+      && REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) == REGNO (m_ccreg)
+      && XEXP (cond, 1) == const0_rtx)
+    branch_if = true;
+
+  else if (GET_CODE (cond) == EQ
+      && REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) == REGNO (m_ccreg)
+      && XEXP (cond, 1) == const0_rtx)
+    branch_if = false;
+
+  else
+    gcc_unreachable ();
+
+  if (branch_target_bb == BRANCH_EDGE (cbranch_insn_bb)->dest)
+    return branch_if;
+  else if (branch_target_bb == FALLTHRU_EDGE (cbranch_insn_bb)->dest)
+    return !branch_if;
+  else
+    gcc_unreachable ();
+}
+
+void
+sh_optimize_sett_clrt
+::find_last_ccreg_values (rtx start_insn, basic_block bb,
+			  std::vector<ccreg_value>& values_out,
+			  basic_block prev_visited_bb) const
+{
+  if (start_insn == NULL_RTX)
+    return;
+
+  log_msg ("looking for ccreg values in [bb %d]\n", bb->index);
+
+  for (rtx i = start_insn; i != NULL_RTX && i != PREV_INSN (BB_HEAD (bb));
+       i = PREV_INSN (i))
+    {
+      if (!INSN_P (i))
+	continue;
+
+      if (reg_set_p (m_ccreg, i))
+	{
+	  const_rtx set_rtx = set_of (m_ccreg, i);
+
+	  ccreg_value v;
+	  v.insn = i;
+	  v.bb = bb;
+	  v.value = set_rtx != NULL_RTX && GET_CODE (set_rtx) == SET
+		    ? XEXP (set_rtx, 1)
+		    : NULL_RTX;
+
+	  log_msg ("found setcc in [bb %d] in insn:\n", bb->index);
+	  log_insn (i);
+	  log_msg ("\nccreg value: ");
+	  log_rtx (v.value);
+	  log_msg ("\n");
+
+	  values_out.push_back (v);
+	  return;
+	}
+
+      if (any_condjump_p (i) && onlyjump_p (i) && prev_visited_bb != NULL)
+	{
+	  // For a conditional branch the ccreg value will be a known constant
+	  // of either 0 or STORE_FLAG_VALUE after branching/falling through
+	  // to one of the two successor BBs.  Record the value for the BB
+	  // where we came from.
+	  log_msg ("found cbranch in [bb %d]:\n", bb->index);
+	  log_insn (i);
+
+	  ccreg_value v;
+	  v.insn = i;
+	  v.bb = bb;
+	  v.value = GEN_INT (sh_cbranch_ccreg_value (i, bb, prev_visited_bb));
+
+	  log_msg ("    branches to [bb %d] with ccreg value ",
+		   prev_visited_bb->index);
+	  log_rtx (v.value);
+	  log_msg ("\n");
+
+	  values_out.push_back (v);
+	  return;
+	}
+    }
+
+  // If here, we've walked up all the insns of the current basic block
+  // and none of them seems to modify the ccreg.
+  // In this case, check the predecessor basic blocks.
+  unsigned int pred_bb_count = 0;
+
+  // If the current basic block is the same as the previous one, it's a loop.
+  // Don't try to recurse again, as this will result in an infinite loop.
+  if (bb != prev_visited_bb)
+    for (edge_iterator ei = ei_start (bb->preds); !ei_end_p (ei); ei_next (&ei))
+      {
+	basic_block pred_bb = ei_edge (ei)->src;
+	if (pred_bb->index == ENTRY_BLOCK)
+	  continue;
+
+	pred_bb_count += 1;
+	find_last_ccreg_values (BB_END (pred_bb), pred_bb, values_out, bb);
+      }
+
+  log_msg ("[bb %d] pred_bb_count = %u\n", bb->index, pred_bb_count);
+
+  // If here, we've walked up all the predecessor basic blocks without finding
+  // anything setcc related.  Add an entry for the last insn of the current
+  // basic block with the ccreg value being set to unknown (NULL_RTX).
+  if (pred_bb_count == 0)
+  {
+    log_msg ("unknown ccreg value for [bb %d]\n", bb->index);
+
+    ccreg_value v;
+    v.insn = BB_END (bb);
+    v.bb = bb;
+    v.value = NULL_RTX;
+
+    values_out.push_back (v);
+  }
+}
+
+bool
+sh_optimize_sett_clrt
+::all_ccreg_values_equal (const std::vector<ccreg_value>& values)
+{
+  if (values.empty ())
+    return false;
+
+  rtx last_value = values.front ().value;
+
+  // If the ccreg is modified in the insn but the exact value is not known
+  // the value rtx might be null.
+  if (last_value == NULL_RTX)
+    return false;
+
+  for (std::vector<ccreg_value>::const_iterator i = values.begin ();
+       i != values.end (); ++i)
+    if (i->value == NULL_RTX || !rtx_equal_p (last_value, i->value))
+      return false;
+
+  return true;
+}
+
+void
+sh_optimize_sett_clrt
+::remove_ccreg_dead_unused_notes (std::vector<ccreg_value>& values) const
+{
+  for (std::vector<ccreg_value>::iterator i = values.begin ();
+       i != values.end (); ++i)
+    {
+      if (i->insn == NULL_RTX)
+	continue;
+
+      rtx n = find_regno_note (i->insn, REG_DEAD, REGNO (m_ccreg));
+      if (n != NULL_RTX)
+	remove_note (i->insn, n);
+
+      n = find_regno_note (i->insn, REG_UNUSED, REGNO (m_ccreg));
+      if (n != NULL_RTX)
+	remove_note (i->insn, n);
+    }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// This allows instantiating the pass somewhere else without having to pull
+// in a header file.
+opt_pass*
+make_pass_sh_optimize_sett_clrt (gcc::context* ctx, const char* name)
+{
+  return new sh_optimize_sett_clrt (ctx, name);
+}
diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh
index 710bc873e75..405d3142253 100644
--- a/gcc/config/sh/t-sh
+++ b/gcc/config/sh/t-sh
@@ -29,6 +29,10 @@ sh_treg_combine.o: $(srcdir)/config/sh/sh_treg_combine.cc \
   $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
 
+sh_optimize_sett_clrt.o: $(srcdir)/config/sh/sh_optimize_sett_clrt.cc \
+  $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
 DEFAULT_ENDIAN = $(word 1,$(TM_ENDIAN_CONFIG))
 OTHER_ENDIAN = $(word 2,$(TM_ENDIAN_CONFIG))
 
diff --git a/gcc/config/sol2-c.c b/gcc/config/sol2-c.c
index ee44621591e..8254af0fd73 100644
--- a/gcc/config/sol2-c.c
+++ b/gcc/config/sol2-c.c
@@ -22,6 +22,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "tm.h"
 #include "tm_p.h"
 
diff --git a/gcc/config/sol2-cxx.c b/gcc/config/sol2-cxx.c
index e1b450d759b..d3d79554b96 100644
--- a/gcc/config/sol2-cxx.c
+++ b/gcc/config/sol2-cxx.c
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "tree.h"
+#include "stringpool.h"
 #include "cp/cp-tree.h"
 #include "tm.h"
 #include "tm_p.h"
diff --git a/gcc/config/sol2.c b/gcc/config/sol2.c
index 7c7c429db3d..4200e620e16 100644
--- a/gcc/config/sol2.c
+++ b/gcc/config/sol2.c
@@ -22,6 +22,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "varasm.h"
 #include "output.h"
 #include "tm.h"
 #include "rtl.h"
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 0eabd5b2b72..8bdc9bb6bc6 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -25,6 +25,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
 #include "rtl.h"
 #include "regs.h"
 #include "hard-reg-set.h"
@@ -47,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "target-def.h"
 #include "common/common-target.h"
 #include "gimple.h"
+#include "gimplify.h"
 #include "langhooks.h"
 #include "reload.h"
 #include "params.h"
@@ -6331,7 +6336,7 @@ function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
 	      if (integer_zerop (DECL_SIZE (field)))
 		continue;
 
-	      if (host_integerp (bit_position (field), 1))
+	      if (tree_fits_uhwi_p (bit_position (field)))
 		bitpos += int_bit_position (field);
 	    }
 
@@ -6479,7 +6484,7 @@ function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
 	      if (integer_zerop (DECL_SIZE (field)))
 		continue;
 
-	      if (host_integerp (bit_position (field), 1))
+	      if (tree_fits_uhwi_p (bit_position (field)))
 		bitpos += int_bit_position (field);
 	    }
 
@@ -10653,7 +10658,8 @@ sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
 	      tmp = e0.add_with_sign (tmp, false, &add1_ovf);
 	      if (tmp.is_negative ())
 		tmp = tmp.neg_with_overflow (&neg2_ovf);
-
+	      else
+		neg2_ovf = false;
 	      result = result.add_with_sign (tmp, false, &add2_ovf);
 	      overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
 	    }
diff --git a/gcc/config/spu/spu-c.c b/gcc/config/spu/spu-c.c
index 215962fa7c9..7632ec1462c 100644
--- a/gcc/config/spu/spu-c.c
+++ b/gcc/config/spu/spu-c.c
@@ -20,6 +20,7 @@
 #include "tm.h"
 #include "cpplib.h"
 #include "tree.h"
+#include "stringpool.h"
 #include "c-family/c-common.h"
 #include "c-family/c-pragma.h"
 #include "tm_p.h"
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index 38c441d9a03..315f1b3617f 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -28,6 +28,10 @@
 #include "recog.h"
 #include "obstack.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
 #include "expr.h"
 #include "optabs.h"
 #include "except.h"
@@ -46,6 +50,7 @@
 #include "params.h"
 #include "machmode.h"
 #include "gimple.h"
+#include "gimplify.h"
 #include "tm-constrs.h"
 #include "ddg.h"
 #include "sbitmap.h"
@@ -2469,13 +2474,13 @@ spu_machine_dependent_reorg (void)
   compact_blocks ();
 
   spu_bb_info =
-    (struct spu_bb_info *) xcalloc (n_basic_blocks,
+    (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
 				    sizeof (struct spu_bb_info));
 
   /* We need exact insn addresses and lengths.  */
   shorten_branches (get_insns ());
 
-  for (i = n_basic_blocks - 1; i >= 0; i--)
+  for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
     {
       bb = BASIC_BLOCK (i);
       branch = 0;
diff --git a/gcc/config/stormy16/stormy16.c b/gcc/config/stormy16/stormy16.c
index 30d6d781576..d5a1fc6ad86 100644
--- a/gcc/config/stormy16/stormy16.c
+++ b/gcc/config/stormy16/stormy16.c
@@ -35,6 +35,10 @@
 #include "diagnostic-core.h"
 #include "obstack.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
 #include "expr.h"
 #include "optabs.h"
 #include "except.h"
@@ -44,6 +48,7 @@
 #include "tm_p.h"
 #include "langhooks.h"
 #include "gimple.h"
+#include "gimplify.h"
 #include "df.h"
 #include "reload.h"
 #include "ggc.h"
diff --git a/gcc/config/tilegx/tilegx.c b/gcc/config/tilegx/tilegx.c
index dafa44c0674..809beefa305 100644
--- a/gcc/config/tilegx/tilegx.c
+++ b/gcc/config/tilegx/tilegx.c
@@ -41,6 +41,11 @@
 #include "timevar.h"
 #include "tree.h"
 #include "gimple.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "gimplify.h"
 #include "cfgloop.h"
 #include "tilegx-builtins.h"
 #include "tilegx-multiply.h"
diff --git a/gcc/config/tilepro/tilepro.c b/gcc/config/tilepro/tilepro.c
index 5e3be831fb1..9fe1b104f57 100644
--- a/gcc/config/tilepro/tilepro.c
+++ b/gcc/config/tilepro/tilepro.c
@@ -42,6 +42,11 @@
 #include "timevar.h"
 #include "tree.h"
 #include "gimple.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "gimplify.h"
 #include "cfgloop.h"
 #include "tilepro-builtins.h"
 #include "tilepro-multiply.h"
diff --git a/gcc/config/v850/v850-c.c b/gcc/config/v850/v850-c.c
index 63ef368afc4..25158d50e09 100644
--- a/gcc/config/v850/v850-c.c
+++ b/gcc/config/v850/v850-c.c
@@ -24,6 +24,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm.h"
 #include "cpplib.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "c-family/c-pragma.h"
 #include "diagnostic-core.h"
 #include "ggc.h"
diff --git a/gcc/config/v850/v850.c b/gcc/config/v850/v850.c
index 006cff4bcdf..32fe73b1fa6 100644
--- a/gcc/config/v850/v850.c
+++ b/gcc/config/v850/v850.c
@@ -23,6 +23,10 @@
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
 #include "rtl.h"
 #include "regs.h"
 #include "hard-reg-set.h"
diff --git a/gcc/config/vax/vax.c b/gcc/config/vax/vax.c
index 6b643d1af5d..7aac7cb0554 100644
--- a/gcc/config/vax/vax.c
+++ b/gcc/config/vax/vax.c
@@ -24,6 +24,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "rtl.h"
 #include "df.h"
 #include "tree.h"
+#include "calls.h"
+#include "varasm.h"
 #include "regs.h"
 #include "hard-reg-set.h"
 #include "insn-config.h"
diff --git a/gcc/config/vms/vms.c b/gcc/config/vms/vms.c
index ba1e2a69798..3047cfde9fe 100644
--- a/gcc/config/vms/vms.c
+++ b/gcc/config/vms/vms.c
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "tree.h"
+#include "stringpool.h"
 #include "vms-protos.h"
 #include "ggc.h"
 #include "target.h"
diff --git a/gcc/config/vxworks.c b/gcc/config/vxworks.c
index 2900d9785ea..2940ea11933 100644
--- a/gcc/config/vxworks.c
+++ b/gcc/config/vxworks.c
@@ -26,6 +26,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "output.h"
 #include "tm.h"
 #include "tree.h"
+#include "stringpool.h"
 
 /* Like default_named_section_asm_out_constructor, except that even
    constructors with DEFAULT_INIT_PRIORITY must go in a numbered
diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c
index 7faf7de9968..9e6bb23818f 100644
--- a/gcc/config/xtensa/xtensa.c
+++ b/gcc/config/xtensa/xtensa.c
@@ -34,6 +34,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "recog.h"
 #include "output.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
 #include "expr.h"
 #include "flags.h"
 #include "reload.h"
@@ -47,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "target-def.h"
 #include "langhooks.h"
 #include "gimple.h"
+#include "gimplify.h"
 #include "df.h"
author	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2013-11-22 14:58:36 +0000
committer	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2013-11-22 14:58:36 +0000
commit	cfea6514a4b6ced0930593ebb48d0037e9716d87 (patch)
tree	eeccf866e18463f7dc7ea882ea944247d4ed1010 /gcc/config
parent	9456798d72d0e81a2a553287f436dcb05cff175a (diff)
download	gcc-cfea6514a4b6ced0930593ebb48d0037e9716d87.tar.gz