160 files changed, 7734 insertions, 4283 deletions
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 58e815471a6..371e74c7f94 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -35,3 +35,4 @@
 AARCH64_OPT_EXTENSION("fp",	AARCH64_FL_FP,	AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO)
 AARCH64_OPT_EXTENSION("simd",	AARCH64_FL_FPSIMD,	AARCH64_FL_SIMD | AARCH64_FL_CRYPTO)
 AARCH64_OPT_EXTENSION("crypto",	AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD,	AARCH64_FL_CRYPTO)
+AARCH64_OPT_EXTENSION("crc",	AARCH64_FL_CRC,	AARCH64_FL_CRC)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 55dead6e404..4046d7a7001 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -40,10 +40,6 @@
    10 - CODE_FOR_<name><mode>.  */
 
   BUILTIN_VD_RE (CREATE, create, 0)
-  BUILTIN_VQ_S (GETLANE, get_lane_signed, 0)
-  BUILTIN_VDQ (GETLANE, get_lane_unsigned, 0)
-  BUILTIN_VDQF (GETLANE, get_lane, 0)
-  VAR1 (GETLANE, get_lane, 0, di)
   BUILTIN_VDC (COMBINE, combine, 0)
   BUILTIN_VB (BINOP, pmul, 0)
   BUILTIN_VDQF (UNOP, sqrt, 2)
@@ -51,6 +47,9 @@
   VAR1 (UNOP, addp, 0, di)
   VAR1 (UNOP, clz, 2, v4si)
 
+  BUILTIN_VALL (GETLANE, get_lane, 0)
+  VAR1 (GETLANE, get_lane, 0, di)
+
   BUILTIN_VD_RE (REINTERP, reinterpretdi, 0)
   BUILTIN_VDC (REINTERP, reinterpretv8qi, 0)
   BUILTIN_VDC (REINTERP, reinterpretv4hi, 0)
@@ -64,7 +63,6 @@
   BUILTIN_VQ (REINTERP, reinterpretv2df, 0)
 
   BUILTIN_VDQ_I (BINOP, dup_lane, 0)
-  BUILTIN_VDQ_I (BINOP, dup_lane_scalar, 0)
   /* Implemented by aarch64_<sur>q<r>shl<mode>.  */
   BUILTIN_VSDQ_I (BINOP, sqshl, 0)
   BUILTIN_VSDQ_I (BINOP, uqshl, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 178efdc964e..9805197a22b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -336,46 +336,47 @@
 })
 
 (define_insn "aarch64_simd_dup<mode>"
-  [(set (match_operand:VDQ 0 "register_operand" "=w")
-        (vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r")))]
+  [(set (match_operand:VDQ 0 "register_operand" "=w, w")
+        (vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r, w")))]
   "TARGET_SIMD"
-  "dup\\t%0.<Vtype>, %<vw>1"
-  [(set_attr "simd_type" "simd_dupgp")
+  "@
+   dup\\t%0.<Vtype>, %<vw>1
+   dup\\t%0.<Vtype>, %1.<Vetype>[0]"
+  [(set_attr "simd_type" "simd_dupgp, simd_dup")
    (set_attr "simd_mode" "<MODE>")]
 )
 
-(define_insn "aarch64_dup_lane<mode>"
-  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-        (vec_duplicate:VDQ_I
-	  (vec_select:<VEL>
-	    (match_operand:<VCON> 1 "register_operand" "w")
-	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
-          )))]
+(define_insn "aarch64_simd_dup<mode>"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+        (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
   "TARGET_SIMD"
-  "dup\\t%<v>0<Vmtype>, %1.<Vetype>[%2]"
+  "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
   [(set_attr "simd_type" "simd_dup")
    (set_attr "simd_mode" "<MODE>")]
 )
 
-(define_insn "aarch64_dup_lane_scalar<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w, r")
-	(vec_select:<VEL>
-	  (match_operand:VDQ 1 "register_operand" "w, w")
-	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i")])
-        ))]
+(define_insn "aarch64_dup_lane<mode>"
+  [(set (match_operand:VALL 0 "register_operand" "=w")
+	(vec_duplicate:VALL
+	  (vec_select:<VEL>
+	    (match_operand:VALL 1 "register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
+          )))]
   "TARGET_SIMD"
-  "@
-   dup\\t%<Vetype>0, %1.<Vetype>[%2]
-   umov\\t%<vw>0, %1.<Vetype>[%2]"
-  [(set_attr "simd_type" "simd_dup, simd_movgp")
+  "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
+  [(set_attr "simd_type" "simd_dup")
    (set_attr "simd_mode" "<MODE>")]
 )
 
-(define_insn "aarch64_simd_dup<mode>"
-  [(set (match_operand:VDQF 0 "register_operand" "=w")
-        (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
+(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
+  [(set (match_operand:VALL 0 "register_operand" "=w")
+	(vec_duplicate:VALL
+	  (vec_select:<VEL>
+	    (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
+          )))]
   "TARGET_SIMD"
-  "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
+  "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
   [(set_attr "simd_type" "simd_dup")
    (set_attr "simd_mode" "<MODE>")]
 )
@@ -1051,6 +1052,7 @@
    fmov\\t%d0, %1
    dup\\t%d0, %1"
   [(set_attr "v8type" "*,fmov,*")
+   (set_attr "type" "*,fmov,*")
    (set_attr "simd_type" "simd_dup,*,simd_dup")
    (set_attr "simd_mode" "<MODE>")
    (set_attr "simd" "yes,*,yes")
@@ -2146,45 +2148,50 @@
   DONE;
 })
 
-(define_insn "aarch64_get_lane_signed<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=r")
-	(sign_extend:<VEL>
+;; Lane extraction with sign extension to general purpose register.
+(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(sign_extend:GPI
 	  (vec_select:<VEL>
-	    (match_operand:VQ_S 1 "register_operand" "w")
+	    (match_operand:VDQQH 1 "register_operand" "w")
 	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
   "TARGET_SIMD"
-  "smov\\t%0, %1.<Vetype>[%2]"
+  "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"
   [(set_attr "simd_type" "simd_movgp")
-   (set_attr "simd_mode" "<MODE>")]
+   (set_attr "simd_mode" "<VDQQH:MODE>")]
 )
 
-(define_insn "aarch64_get_lane_unsigned<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=r")
-	(zero_extend:<VEL>
+(define_insn "*aarch64_get_lane_zero_extendsi<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI
 	  (vec_select:<VEL>
-	    (match_operand:VDQ 1 "register_operand" "w")
+	    (match_operand:VDQQH 1 "register_operand" "w")
 	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
   "TARGET_SIMD"
-  "umov\\t%<vw>0, %1.<Vetype>[%2]"
+  "umov\\t%w0, %1.<Vetype>[%2]"
   [(set_attr "simd_type" "simd_movgp")
    (set_attr "simd_mode" "<MODE>")]
 )
 
+;; Lane extraction of a value, neither sign nor zero extension
+;; is guaranteed so upper bits should be considered undefined.
 (define_insn "aarch64_get_lane<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+  [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
 	(vec_select:<VEL>
-	    (match_operand:VDQF 1 "register_operand" "w")
-	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+	  (match_operand:VALL 1 "register_operand" "w, w")
+	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i")])))]
   "TARGET_SIMD"
-  "mov\\t%0.<Vetype>[0], %1.<Vetype>[%2]"
-  [(set_attr "simd_type" "simd_ins")
+  "@
+   umov\\t%<vwcore>0, %1.<Vetype>[%2]
+   dup\\t%<Vetype>0, %1.<Vetype>[%2]"
+  [(set_attr "simd_type" "simd_movgp, simd_dup")
    (set_attr "simd_mode" "<MODE>")]
 )
 
 (define_expand "aarch64_get_lanedi"
-  [(match_operand:DI 0 "register_operand" "=r")
-   (match_operand:DI 1 "register_operand" "w")
-   (match_operand:SI 2 "immediate_operand" "i")]
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
   "TARGET_SIMD"
 {
   aarch64_simd_lane_bounds (operands[2], 0, 1);
@@ -2790,7 +2797,7 @@
 		      (match_operand:VD_HSI 2 "register_operand" "w"))
 		(sign_extend:<VWIDE>
 		  (vec_duplicate:VD_HSI
-		    (match_operand:<VEL> 3 "register_operand" "w"))))
+		    (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
 	      (const_int 1))))]
   "TARGET_SIMD"
   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
@@ -2948,7 +2955,7 @@
                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
 	      (sign_extend:<VWIDE>
                 (vec_duplicate:<VHALF>
-		  (match_operand:<VEL> 3 "register_operand" "w"))))
+		  (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
 	    (const_int 1))))]
   "TARGET_SIMD"
   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
@@ -3076,7 +3083,7 @@
 		 (match_operand:VD_HSI 1 "register_operand" "w"))
 	       (sign_extend:<VWIDE>
                  (vec_duplicate:VD_HSI
-                   (match_operand:<VEL> 2 "register_operand" "w")))
+                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
 	       )
 	     (const_int 1)))]
   "TARGET_SIMD"
@@ -3186,7 +3193,7 @@
                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
 	       (sign_extend:<VWIDE>
                  (vec_duplicate:<VHALF>
-                   (match_operand:<VEL> 2 "register_operand" "w")))
+                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
 	       )
 	     (const_int 1)))]
   "TARGET_SIMD"
@@ -4172,13 +4179,23 @@
    (set_attr "simd_mode" "<MODE>")]
 )
 
+(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
+		    FRECP))]
+  "TARGET_SIMD"
+  "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
+  [(set_attr "simd_type" "simd_frecp<FRECP:frecp_suffix>")
+   (set_attr "mode" "<MODE>")]
+)
+
 (define_insn "aarch64_frecps<mode>"
-  [(set (match_operand:VDQF 0 "register_operand" "=w")
-	(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
-		     (match_operand:VDQF 2 "register_operand" "w")]
+  [(set (match_operand:VALLF 0 "register_operand" "=w")
+	(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
+		     (match_operand:VALLF 2 "register_operand" "w")]
 		    UNSPEC_FRECPS))]
   "TARGET_SIMD"
-  "frecps\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
   [(set_attr "simd_type" "simd_frecps")
    (set_attr "simd_mode" "<MODE>")]
 )
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index aed035a434e..7635e1e2679 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7932,6 +7932,55 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)
 }
 
 static bool
+aarch64_evpc_dup (struct expand_vec_perm_d *d)
+{
+  rtx (*gen) (rtx, rtx, rtx);
+  rtx out = d->target;
+  rtx in0;
+  enum machine_mode vmode = d->vmode;
+  unsigned int i, elt, nelt = d->nelt;
+  rtx lane;
+
+  /* TODO: This may not be big-endian safe.  */
+  if (BYTES_BIG_ENDIAN)
+    return false;
+
+  elt = d->perm[0];
+  for (i = 1; i < nelt; i++)
+    {
+      if (elt != d->perm[i])
+	return false;
+    }
+
+  /* The generic preparation in aarch64_expand_vec_perm_const_1
+     swaps the operand order and the permute indices if it finds
+     d->perm[0] to be in the second operand.  Thus, we can always
+     use d->op0 and need not do any extra arithmetic to get the
+     correct lane number.  */
+  in0 = d->op0;
+  lane = GEN_INT (elt);
+
+  switch (vmode)
+    {
+    case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
+    case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
+    case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
+    case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
+    case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
+    case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
+    case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
+    case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
+    case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
+    case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
+    default:
+      return false;
+    }
+
+  emit_insn (gen (out, in0, lane));
+  return true;
+}
+
+static bool
 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
 {
   rtx rperm[MAX_VECT_LEN], sel;
@@ -7988,6 +8037,8 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 	return true;
       else if (aarch64_evpc_trn (d))
 	return true;
+      else if (aarch64_evpc_dup (d))
+	return true;
       return aarch64_evpc_tbl (d);
     }
   return false;
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 092426973c6..d8012f88049 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -158,6 +158,7 @@
 #define AARCH64_FL_FP         (1 << 1)	/* Has FP.  */
 #define AARCH64_FL_CRYPTO     (1 << 2)	/* Has crypto.  */
 #define AARCH64_FL_SLOWMUL    (1 << 3)	/* A slow multiply core.  */
+#define AARCH64_FL_CRC        (1 << 4)	/* Has CRC.  */
 
 /* Has FP and SIMD.  */
 #define AARCH64_FL_FPSIMD     (AARCH64_FL_FP | AARCH64_FL_SIMD)
@@ -170,6 +171,7 @@
 
 /* Macros to test ISA flags.  */
 extern unsigned long aarch64_isa_flags;
+#define AARCH64_ISA_CRC            (aarch64_isa_flags & AARCH64_FL_CRC)
 #define AARCH64_ISA_CRYPTO         (aarch64_isa_flags & AARCH64_FL_CRYPTO)
 #define AARCH64_ISA_FP             (aarch64_isa_flags & AARCH64_FL_FP)
 #define AARCH64_ISA_SIMD           (aarch64_isa_flags & AARCH64_FL_SIMD)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 5d64228351b..f37f98f9994 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -88,11 +88,16 @@
     UNSPEC_NOP
     UNSPEC_PRLG_STK
     UNSPEC_RBIT
+    UNSPEC_SISD_NEG
+    UNSPEC_SISD_SSHL
+    UNSPEC_SISD_USHL
+    UNSPEC_SSHL_2S
     UNSPEC_ST2
     UNSPEC_ST3
     UNSPEC_ST4
     UNSPEC_TLS
     UNSPEC_TLSDESC
+    UNSPEC_USHL_2S
     UNSPEC_VSTRUCTDUMMY
 ])
 
@@ -235,9 +240,6 @@
    fmovf2i,\
    fmovi2f,\
    fmul,\
-   frecpe,\
-   frecps,\
-   frecpx,\
    frint,\
    fsqrt,\
    load_acq,\
@@ -272,48 +274,9 @@
    udiv"
   (const_string "alu"))
 
-
-; The "type" attribute is used by the AArch32 backend.  Below is a mapping
-; from "v8type" to "type".
-
-(define_attr "type"
-  "alu,alu_shift,block,branch,call,f_2_r,f_cvt,f_flag,f_loads,
-   f_loadd,f_stored,f_stores,faddd,fadds,fcmpd,fcmps,fconstd,fconsts,
-   fcpys,fdivd,fdivs,ffarithd,ffariths,fmacd,fmacs,fmuld,fmuls,load_byte,
-   load1,load2,mult,r_2_f,store1,store2"
-  (cond [
-	  (eq_attr "v8type" "alu_shift,alus_shift,logic_shift,logics_shift") (const_string "alu_shift")
-	  (eq_attr "v8type" "branch") (const_string "branch")
-	  (eq_attr "v8type" "call") (const_string "call")
-	  (eq_attr "v8type" "fmovf2i") (const_string "f_2_r")
-	  (eq_attr "v8type" "fcvt,fcvtf2i,fcvti2f") (const_string "f_cvt")
-	  (and (eq_attr "v8type" "fpsimd_load") (eq_attr "mode" "SF")) (const_string "f_loads")
-	  (and (eq_attr "v8type" "fpsimd_load") (eq_attr "mode" "DF")) (const_string "f_loadd")
-	  (and (eq_attr "v8type" "fpsimd_store") (eq_attr "mode" "SF")) (const_string "f_stores")
-	  (and (eq_attr "v8type" "fpsimd_store") (eq_attr "mode" "DF")) (const_string "f_stored")
-	  (and (eq_attr "v8type" "fadd,fminmax") (eq_attr "mode" "DF")) (const_string "faddd")
-	  (and (eq_attr "v8type" "fadd,fminmax") (eq_attr "mode" "SF")) (const_string "fadds")
-	  (and (eq_attr "v8type" "fcmp,fccmp") (eq_attr "mode" "DF")) (const_string "fcmpd")
-	  (and (eq_attr "v8type" "fcmp,fccmp") (eq_attr "mode" "SF")) (const_string "fcmps")
-	  (and (eq_attr "v8type" "fconst") (eq_attr "mode" "DF")) (const_string "fconstd")
-	  (and (eq_attr "v8type" "fconst") (eq_attr "mode" "SF")) (const_string "fconsts")
-	  (and (eq_attr "v8type" "fdiv,fsqrt") (eq_attr "mode" "DF")) (const_string "fdivd")
-	  (and (eq_attr "v8type" "fdiv,fsqrt") (eq_attr "mode" "SF")) (const_string "fdivs")
-	  (and (eq_attr "v8type" "ffarith") (eq_attr "mode" "DF")) (const_string "ffarithd")
-	  (and (eq_attr "v8type" "ffarith") (eq_attr "mode" "SF")) (const_string "ffariths")
-	  (and (eq_attr "v8type" "fmadd") (eq_attr "mode" "DF")) (const_string "fmacd")
-	  (and (eq_attr "v8type" "fmadd") (eq_attr "mode" "SF")) (const_string "fmacs")
-	  (and (eq_attr "v8type" "fmul") (eq_attr "mode" "DF")) (const_string "fmuld")
-	  (and (eq_attr "v8type" "fmul") (eq_attr "mode" "SF")) (const_string "fmuls")
-	  (and (eq_attr "v8type" "load1") (eq_attr "mode" "QI,HI")) (const_string "load_byte")
-	  (and (eq_attr "v8type" "load1") (eq_attr "mode" "SI,DI,TI")) (const_string "load1")
-	  (eq_attr "v8type" "load2") (const_string "load2")
-	  (and (eq_attr "v8type" "mulh,mult,mull,madd,sdiv,udiv") (eq_attr "mode" "SI")) (const_string "mult")
-	  (eq_attr "v8type" "fmovi2f") (const_string "r_2_f")
-	  (eq_attr "v8type" "store1") (const_string "store1")
-	  (eq_attr "v8type" "store2") (const_string "store2")
-  ]
-  (const_string "alu")))
+; The "type" attribute is is included here from AArch32 backend to be able
+; to share pipeline descriptions.
+(include "../arm/types.md")
 
 ;; Attribute that specifies whether or not the instruction touches fp
 ;; registers.
@@ -349,6 +312,7 @@
 (include "aarch64-generic.md")
 (include "large.md")
 (include "small.md")
+(include "../arm/cortex-a53.md")
 
 ;; -------------------------------------------------------------------
 ;; Jumps and other miscellaneous insns
@@ -358,14 +322,16 @@
   [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
   ""
   "br\\t%0"
-  [(set_attr "v8type" "branch")]
+  [(set_attr "v8type" "branch")
+   (set_attr "type" "branch")]
 )
 
 (define_insn "jump"
   [(set (pc) (label_ref (match_operand 0 "" "")))]
   ""
   "b\\t%l0"
-  [(set_attr "v8type" "branch")]
+  [(set_attr "v8type" "branch")
+   (set_attr "type" "branch")]
 )
 
 (define_expand "cbranch<mode>4"
@@ -403,7 +369,8 @@
 			   (pc)))]
   ""
   "b%m0\\t%l2"
-  [(set_attr "v8type" "branch")]
+  [(set_attr "v8type" "branch")
+   (set_attr "type" "branch")]
 )
 
 (define_expand "casesi"
@@ -467,7 +434,8 @@
   return aarch64_output_casesi (operands);
   "
   [(set_attr "length" "16")
-   (set_attr "v8type" "branch")]
+   (set_attr "v8type" "branch")
+   (set_attr "type" "branch")]
 )
 
 (define_insn "nop"
@@ -508,7 +476,8 @@
   [(return)]
   ""
   "ret"
-  [(set_attr "v8type" "branch")]
+  [(set_attr "v8type" "branch")
+   (set_attr "type" "branch")]
 )
 
 (define_insn "eh_return"
@@ -516,7 +485,9 @@
     UNSPECV_EH_RETURN)]
   ""
   "#"
-  [(set_attr "v8type" "branch")]
+  [(set_attr "v8type" "branch")
+   (set_attr "type" "branch")]
+
 )
 
 (define_split
@@ -536,7 +507,9 @@
 			   (pc)))]
   ""
   "<cbz>\\t%<w>0, %l1"
-  [(set_attr "v8type" "branch")]
+  [(set_attr "v8type" "branch")
+   (set_attr "type" "branch")]
+
 )
 
 (define_insn "*tb<optab><mode>1"
@@ -555,6 +528,7 @@
   return \"<tbz>\\t%<w>0, %1, %l2\";
   "
   [(set_attr "v8type" "branch")
+   (set_attr "type" "branch")
    (set_attr "mode" "<MODE>")
    (set (attr "length")
 	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
@@ -576,6 +550,7 @@
   return \"<tbz>\\t%<w>0, <sizem1>, %l1\";
   "
   [(set_attr "v8type" "branch")
+   (set_attr "type" "branch")
    (set_attr "mode" "<MODE>")
    (set (attr "length")
 	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768))
@@ -620,7 +595,8 @@
    (clobber (reg:DI LR_REGNUM))]
   ""
   "blr\\t%0"
-  [(set_attr "v8type" "call")]
+  [(set_attr "v8type" "call")
+   (set_attr "type" "call")]
 )
 
 (define_insn "*call_symbol"
@@ -631,7 +607,8 @@
   "GET_CODE (operands[0]) == SYMBOL_REF
    && !aarch64_is_long_call_p (operands[0])"
   "bl\\t%a0"
-  [(set_attr "v8type" "call")]
+  [(set_attr "v8type" "call")
+   (set_attr "type" "call")]
 )
 
 (define_expand "call_value"
@@ -668,7 +645,9 @@
    (clobber (reg:DI LR_REGNUM))]
   ""
   "blr\\t%1"
-  [(set_attr "v8type" "call")]
+  [(set_attr "v8type" "call")
+   (set_attr "type" "call")]
+
 )
 
 (define_insn "*call_value_symbol"
@@ -680,7 +659,8 @@
   "GET_CODE (operands[1]) == SYMBOL_REF
    && !aarch64_is_long_call_p (operands[1])"
   "bl\\t%a1"
-  [(set_attr "v8type" "call")]
+  [(set_attr "v8type" "call")
+   (set_attr "type" "call")]
 )
 
 (define_expand "sibcall"
@@ -715,7 +695,9 @@
    (use (match_operand 2 "" ""))]
   "GET_CODE (operands[0]) == SYMBOL_REF"
   "b\\t%a0"
-  [(set_attr "v8type" "branch")]
+  [(set_attr "v8type" "branch")
+   (set_attr "type" "branch")]
+
 )
 
 (define_insn "*sibcall_value_insn"
@@ -726,7 +708,8 @@
    (use (match_operand 3 "" ""))]
   "GET_CODE (operands[1]) == SYMBOL_REF"
   "b\\t%a1"
-  [(set_attr "v8type" "branch")]
+  [(set_attr "v8type" "branch")
+   (set_attr "type" "branch")]
 )
 
 ;; Call subroutine returning any type.
@@ -804,6 +787,7 @@
      }
 }
   [(set_attr "v8type" "move,alu,alu,load1,load1,store1,store1,*,*,*")
+   (set_attr "type" "mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,*,*,*")
    (set_attr "simd_type" "*,*,simd_move_imm,*,*,*,*,simd_movgp,simd_dupgp,simd_dup")
    (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")
    (set_attr "mode" "<MODE>")
@@ -846,6 +830,8 @@
    fmov\\t%w0, %s1
    fmov\\t%s0, %s1"
   [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov")
+   (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
+                     adr,adr,fmov,fmov,fmov")
    (set_attr "mode" "SI")
    (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
 )
@@ -871,6 +857,8 @@
    fmov\\t%d0, %d1
    movi\\t%d0, %1"
   [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov,fmov")
+   (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
+                     adr,adr,fmov,fmov,fmov,fmov")
    (set_attr "mode" "DI")
    (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
    (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
@@ -885,6 +873,7 @@
    && UINTVAL (operands[1]) % 16 == 0"
   "movk\\t%<w>0, %X2, lsl %1"
   [(set_attr "v8type" "movk")
+   (set_attr "type" "mov_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -917,6 +906,8 @@
    str\\t%q1, %0"
   [(set_attr "v8type" "move2,fmovi2f,fmovf2i,*, \
 		       load2,store2,store2,fpsimd_load,fpsimd_store")
+   (set_attr "type" "multiple,f_mcr,f_mrc,*, \
+		             load2,store2,store2,f_loadd,f_stored")
    (set_attr "simd_type" "*,*,*,simd_move,*,*,*,*,*")
    (set_attr "mode" "DI,DI,DI,TI,DI,DI,DI,TI,TI")
    (set_attr "length" "8,8,8,4,4,4,4,4,4")
@@ -970,6 +961,8 @@
   [(set_attr "v8type" "fmovi2f,fmovf2i,\
 		       fmov,fconst,fpsimd_load,\
 		       fpsimd_store,fpsimd_load,fpsimd_store,fmov")
+   (set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
+                     f_loads,f_stores,f_loads,f_stores,fmov")
    (set_attr "mode" "SF")]
 )
 
@@ -991,6 +984,8 @@
   [(set_attr "v8type" "fmovi2f,fmovf2i,\
 		       fmov,fconst,fpsimd_load,\
 		       fpsimd_store,fpsimd_load,fpsimd_store,move")
+   (set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\
+                     f_loadd,f_stored,f_loadd,f_stored,mov_reg")
    (set_attr "mode" "DF")]
 )
 
@@ -1029,6 +1024,8 @@
    ldp\\t%0, %H0, %1
    stp\\t%1, %H1, %0"
   [(set_attr "v8type" "logic,move2,fmovi2f,fmovf2i,fconst,fconst,fpsimd_load,fpsimd_store,fpsimd_load2,fpsimd_store2")
+   (set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,fconstd,fconstd,\
+                     f_loadd,f_stored,neon_ldm_2,neon_stm_2")
    (set_attr "mode" "DF,DF,DF,DF,DF,DF,TF,TF,DF,DF")
    (set_attr "length" "4,8,8,8,4,4,4,4,4,4")
    (set_attr "fp" "*,*,yes,yes,*,yes,yes,yes,*,*")
@@ -1059,6 +1056,7 @@
 			       GET_MODE_SIZE (<MODE>mode)))"
   "ldp\\t%<w>0, %<w>2, %1"
   [(set_attr "v8type" "load2")
+   (set_attr "type" "load2")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1075,6 +1073,7 @@
 			       GET_MODE_SIZE (<MODE>mode)))"
   "stp\\t%<w>1, %<w>3, %0"
   [(set_attr "v8type" "store2")
+   (set_attr "type" "store2")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1091,6 +1090,7 @@
 			       GET_MODE_SIZE (<MODE>mode)))"
   "ldp\\t%<w>0, %<w>2, %1"
   [(set_attr "v8type" "fpsimd_load2")
+   (set_attr "type" "neon_ldm_2")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1106,7 +1106,8 @@
 			       XEXP (operands[0], 0),
 			       GET_MODE_SIZE (<MODE>mode)))"
   "stp\\t%<w>1, %<w>3, %0"
-  [(set_attr "v8type" "fpsimd_load2")
+  [(set_attr "v8type" "fpsimd_store2")
+   (set_attr "type" "neon_stm_2")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1126,6 +1127,7 @@
   "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
   "ldp\\t%<w>2, %<w>3, [%1], %4"
   [(set_attr "v8type" "load2")
+   (set_attr "type" "load2")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -1145,6 +1147,7 @@
   "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
   "stp\\t%<w>2, %<w>3, [%0, %4]!"
   [(set_attr "v8type" "store2")
+   (set_attr "type" "store2")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -1166,6 +1169,7 @@
    sxtw\t%0, %w1
    ldrsw\t%0, %1"
   [(set_attr "v8type" "extend,load1")
+   (set_attr "type" "extend,load1")
    (set_attr "mode" "DI")]
 )
 
@@ -1177,6 +1181,7 @@
    uxtw\t%0, %w1
    ldr\t%w0, %1"
   [(set_attr "v8type" "extend,load1")
+   (set_attr "type" "extend,load1")
    (set_attr "mode" "DI")]
 )
 
@@ -1194,6 +1199,7 @@
    sxt<SHORT:size>\t%<GPI:w>0, %w1
    ldrs<SHORT:size>\t%<GPI:w>0, %1"
   [(set_attr "v8type" "extend,load1")
+   (set_attr "type" "extend,load1")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -1206,6 +1212,7 @@
    ldr<SHORT:size>\t%w0, %1
    ldr\t%<SHORT:size>0, %1"
   [(set_attr "v8type" "extend,load1,load1")
+   (set_attr "type" "extend,load1,load1")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -1223,6 +1230,7 @@
    <su>xtb\t%w0, %w1
    <ldrxt>b\t%w0, %1"
   [(set_attr "v8type" "extend,load1")
+   (set_attr "type" "extend,load1")
    (set_attr "mode" "HI")]
 )
 
@@ -1267,6 +1275,7 @@
   add\\t%w0, %w1, %w2
   sub\\t%w0, %w1, #%n2"
   [(set_attr "v8type" "alu")
+   (set_attr "type" "alu_imm,alu_reg,alu_imm")
    (set_attr "mode" "SI")]
 )
 
@@ -1283,6 +1292,7 @@
   add\\t%w0, %w1, %w2
   sub\\t%w0, %w1, #%n2"
   [(set_attr "v8type" "alu")
+   (set_attr "type" "alu_imm,alu_reg,alu_imm")
    (set_attr "mode" "SI")]
 )
 
@@ -1299,6 +1309,7 @@
   sub\\t%x0, %x1, #%n2
   add\\t%d0, %d1, %d2"
   [(set_attr "v8type" "alu")
+   (set_attr "type" "alu_imm,alu_reg,alu_imm,alu_reg")
    (set_attr "mode" "DI")
    (set_attr "simd" "*,*,*,yes")]
 )
@@ -1306,16 +1317,18 @@
 (define_insn "*add<mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
-	 (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r")
-		   (match_operand:GPI 2 "aarch64_plus_operand" "rI,J"))
+	 (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r,r")
+		   (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J"))
 	 (const_int 0)))
-   (set (match_operand:GPI 0 "register_operand" "=r,r")
+   (set (match_operand:GPI 0 "register_operand" "=r,r,r")
 	(plus:GPI (match_dup 1) (match_dup 2)))]
   ""
   "@
   adds\\t%<w>0, %<w>1, %<w>2
+  adds\\t%<w>0, %<w>1, %<w>2
   subs\\t%<w>0, %<w>1, #%n2"
   [(set_attr "v8type" "alus")
+   (set_attr "type" "alus_reg,alus_imm,alus_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1323,16 +1336,18 @@
 (define_insn "*addsi3_compare0_uxtw"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
-	 (plus:SI (match_operand:SI 1 "register_operand" "%r,r")
-		  (match_operand:SI 2 "aarch64_plus_operand" "rI,J"))
+	 (plus:SI (match_operand:SI 1 "register_operand" "%r,r,r")
+		  (match_operand:SI 2 "aarch64_plus_operand" "r,I,J"))
 	 (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r,r")
+   (set (match_operand:DI 0 "register_operand" "=r,r,r")
 	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
   ""
   "@
   adds\\t%w0, %w1, %w2
+  adds\\t%w0, %w1, %w2
   subs\\t%w0, %w1, #%n2"
   [(set_attr "v8type" "alus")
+   (set_attr "type" "alus_reg,alus_imm,alus_imm")
    (set_attr "mode" "SI")]
 )
 
@@ -1350,6 +1365,7 @@
   ""
   "adds\\t%<w>0, %<w>3, %<w>1, lsl %p2"
   [(set_attr "v8type" "alus_shift")
+   (set_attr "type" "alus_shift_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1367,6 +1383,7 @@
   ""
   "subs\\t%<w>0, %<w>1, %<w>2, lsl %p3"
   [(set_attr "v8type" "alus_shift")
+   (set_attr "type" "alus_shift_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1382,6 +1399,7 @@
   ""
   "adds\\t%<GPI:w>0, %<GPI:w>2, %<GPI:w>1, <su>xt<ALLX:size>"
   [(set_attr "v8type" "alus_ext")
+   (set_attr "type" "alus_ext")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -1397,6 +1415,7 @@
   ""
   "subs\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size>"
   [(set_attr "v8type" "alus_ext")
+   (set_attr "type" "alus_ext")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -1418,6 +1437,7 @@
   "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
   "adds\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
   [(set_attr "v8type" "alus_ext")
+   (set_attr "type" "alus_ext")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1439,20 +1459,23 @@
   "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
   "subs\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
   [(set_attr "v8type" "alus_ext")
+   (set_attr "type" "alus_ext")
    (set_attr "mode" "<MODE>")]
 )
 
 (define_insn "*add<mode>3nr_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
-	 (plus:GPI (match_operand:GPI 0 "register_operand" "%r,r")
-		   (match_operand:GPI 1 "aarch64_plus_operand" "rI,J"))
+	 (plus:GPI (match_operand:GPI 0 "register_operand" "%r,r,r")
+		   (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J"))
 	 (const_int 0)))]
   ""
   "@
   cmn\\t%<w>0, %<w>1
+  cmn\\t%<w>0, %<w>1
   cmp\\t%<w>0, #%n1"
   [(set_attr "v8type" "alus")
+   (set_attr "type" "alus_reg,alus_imm,alus_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1464,6 +1487,7 @@
   ""
   "cmn\\t%<w>0, %<w>1"
   [(set_attr "v8type" "alus")
+   (set_attr "type" "alus_reg")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1475,6 +1499,7 @@
   ""
   "add\\t%<w>0, %<w>3, %<w>1, <shift> %2"
   [(set_attr "v8type" "alu_shift")
+   (set_attr "type" "alu_shift_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1488,6 +1513,7 @@
   ""
   "add\\t%w0, %w3, %w1, <shift> %2"
   [(set_attr "v8type" "alu_shift")
+   (set_attr "type" "alu_shift_imm")
    (set_attr "mode" "SI")]
 )
 
@@ -1499,6 +1525,7 @@
   ""
   "add\\t%<w>0, %<w>3, %<w>1, lsl %p2"
   [(set_attr "v8type" "alu_shift")
+   (set_attr "type" "alu_shift_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1509,6 +1536,7 @@
   ""
   "add\\t%<GPI:w>0, %<GPI:w>2, %<GPI:w>1, <su>xt<ALLX:size>"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -1521,6 +1549,7 @@
   ""
   "add\\t%w0, %w2, %w1, <su>xt<SHORT:size>"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "SI")]
 )
 
@@ -1533,6 +1562,7 @@
   ""
   "add\\t%<GPI:w>0, %<GPI:w>3, %<GPI:w>1, <su>xt<ALLX:size> %2"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -1547,6 +1577,7 @@
   ""
   "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %2"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "SI")]
 )
 
@@ -1559,6 +1590,7 @@
   ""
   "add\\t%<GPI:w>0, %<GPI:w>3, %<GPI:w>1, <su>xt<ALLX:size> %p2"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -1572,6 +1604,7 @@
   ""
   "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %p2"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "SI")]
 )
 
@@ -1586,6 +1619,7 @@
   "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
   "add\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1602,6 +1636,7 @@
   "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])"
   "add\\t%w0, %w4, %w1, <su>xt%e3 %p2"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "SI")]
 )
 
@@ -1615,6 +1650,7 @@
    ""
    "adc\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "v8type" "adc")
+   (set_attr "type" "adc_reg")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1630,6 +1666,7 @@
    ""
    "adc\\t%w0, %w1, %w2"
   [(set_attr "v8type" "adc")
+   (set_attr "type" "adc_reg")
    (set_attr "mode" "SI")]
 )
 
@@ -1643,6 +1680,7 @@
    ""
    "adc\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "v8type" "adc")
+   (set_attr "type" "adc_reg")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1658,6 +1696,7 @@
    ""
    "adc\\t%w0, %w1, %w2"
   [(set_attr "v8type" "adc")
+   (set_attr "type" "adc_reg")
    (set_attr "mode" "SI")]
 )
 
@@ -1671,6 +1710,7 @@
    ""
    "adc\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "v8type" "adc")
+   (set_attr "type" "adc_reg")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1686,6 +1726,7 @@
    ""
    "adc\\t%w0, %w1, %w2"
   [(set_attr "v8type" "adc")
+   (set_attr "type" "adc_reg")
    (set_attr "mode" "SI")]
 )
 
@@ -1699,6 +1740,7 @@
    ""
    "adc\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "v8type" "adc")
+   (set_attr "type" "adc_reg")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1714,6 +1756,7 @@
    ""
    "adc\\t%w0, %w1, %w2"
   [(set_attr "v8type" "adc")
+   (set_attr "type" "adc_reg")
    (set_attr "mode" "SI")]
 )
 
@@ -1730,6 +1773,7 @@
 					   INTVAL (operands[3])));
   return \"add\t%<w>0, %<w>4, %<w>1, uxt%e3 %p2\";"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1748,6 +1792,7 @@
 					   INTVAL (operands[3])));
   return \"add\t%w0, %w4, %w1, uxt%e3 %p2\";"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "SI")]
 )
 
@@ -1758,6 +1803,7 @@
   ""
   "sub\\t%w0, %w1, %w2"
   [(set_attr "v8type" "alu")
+   (set_attr "type" "alu_reg")
    (set_attr "mode" "SI")]
 )
 
@@ -1770,6 +1816,7 @@
   ""
   "sub\\t%w0, %w1, %w2"
   [(set_attr "v8type" "alu")
+   (set_attr "type" "alu_reg")
    (set_attr "mode" "SI")]
 )
 
@@ -1782,6 +1829,7 @@
    sub\\t%x0, %x1, %x2
    sub\\t%d0, %d1, %d2"
   [(set_attr "v8type" "alu")
+   (set_attr "type" "alu_reg")
    (set_attr "mode" "DI")
    (set_attr "simd" "*,yes")]
 )
@@ -1797,6 +1845,7 @@
   ""
   "subs\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "v8type" "alus")
+   (set_attr "type" "alus_reg")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1811,6 +1860,7 @@
   ""
   "subs\\t%w0, %w1, %w2"
   [(set_attr "v8type" "alus")
+   (set_attr "type" "alus_reg")
    (set_attr "mode" "SI")]
 )
 
@@ -1823,6 +1873,7 @@
   ""
   "sub\\t%<w>0, %<w>3, %<w>1, <shift> %2"
   [(set_attr "v8type" "alu_shift")
+   (set_attr "type" "alu_shift_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1837,6 +1888,7 @@
   ""
   "sub\\t%w0, %w3, %w1, <shift> %2"
   [(set_attr "v8type" "alu_shift")
+   (set_attr "type" "alu_shift_imm")
    (set_attr "mode" "SI")]
 )
 
@@ -1849,6 +1901,7 @@
   ""
   "sub\\t%<w>0, %<w>3, %<w>1, lsl %p2"
   [(set_attr "v8type" "alu_shift")
+   (set_attr "type" "alu_shift_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1863,6 +1916,7 @@
   ""
   "sub\\t%w0, %w3, %w1, lsl %p2"
   [(set_attr "v8type" "alu_shift")
+   (set_attr "type" "alu_shift_imm")
    (set_attr "mode" "SI")]
 )
 
@@ -1874,6 +1928,7 @@
   ""
   "sub\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size>"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -1887,6 +1942,7 @@
   ""
   "sub\\t%w0, %w1, %w2, <su>xt<SHORT:size>"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "SI")]
 )
 
@@ -1899,6 +1955,7 @@
   ""
   "sub\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size> %3"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -1913,6 +1970,7 @@
   ""
   "sub\\t%w0, %w1, %w2, <su>xt<SHORT:size> %3"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "SI")]
 )
 
@@ -1927,6 +1985,7 @@
   "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
   "sub\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1943,6 +2002,7 @@
   "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])"
   "sub\\t%w0, %w4, %w1, <su>xt%e3 %p2"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "SI")]
 )
 
@@ -1956,6 +2016,7 @@
    ""
    "sbc\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "v8type" "adc")
+   (set_attr "type" "adc_reg")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -1971,6 +2032,7 @@
    ""
    "sbc\\t%w0, %w1, %w2"
   [(set_attr "v8type" "adc")
+   (set_attr "type" "adc_reg")
    (set_attr "mode" "SI")]
 )
 
@@ -1987,6 +2049,7 @@
 					   INTVAL (operands[3])));
   return \"sub\t%<w>0, %<w>4, %<w>1, uxt%e3 %p2\";"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2005,6 +2068,7 @@
 					   INTVAL (operands[3])));
   return \"sub\t%w0, %w4, %w1, uxt%e3 %p2\";"
   [(set_attr "v8type" "alu_ext")
+   (set_attr "type" "alu_ext")
    (set_attr "mode" "SI")]
 )
 
@@ -2037,6 +2101,7 @@
     DONE;
   }
   [(set_attr "v8type" "alu")
+   (set_attr "type" "alu_reg")
    (set_attr "mode" "DI")]
 )
 
@@ -2048,6 +2113,7 @@
    neg\\t%<w>0, %<w>1
    neg\\t%<rtn>0<vas>, %<rtn>1<vas>"
   [(set_attr "v8type" "alu")
+   (set_attr "type" "alu_reg")
    (set_attr "simd_type" "*,simd_negabs")
    (set_attr "simd" "*,yes")
    (set_attr "mode" "<MODE>")
@@ -2061,6 +2127,7 @@
   ""
   "neg\\t%w0, %w1"
   [(set_attr "v8type" "alu")
+   (set_attr "type" "alu_reg")
    (set_attr "mode" "SI")]
 )
 
@@ -2071,6 +2138,7 @@
   ""
   "ngc\\t%<w>0, %<w>1"
   [(set_attr "v8type" "adc")
+   (set_attr "type" "adc_reg")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2082,6 +2150,7 @@
   ""
   "ngc\\t%w0, %w1"
   [(set_attr "v8type" "adc")
+   (set_attr "type" "adc_reg")
    (set_attr "mode" "SI")]
 )
 
@@ -2094,6 +2163,7 @@
   ""
   "negs\\t%<w>0, %<w>1"
   [(set_attr "v8type" "alus")
+   (set_attr "type" "alus_reg")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2107,6 +2177,7 @@
   ""
   "negs\\t%w0, %w1"
   [(set_attr "v8type" "alus")
+   (set_attr "type" "alus_reg")
    (set_attr "mode" "SI")]
 )
 
@@ -2122,6 +2193,7 @@
   ""
   "negs\\t%<w>0, %<w>1, <shift> %2"
   [(set_attr "v8type" "alus_shift")
+   (set_attr "type" "alus_shift_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2133,6 +2205,7 @@
   ""
   "neg\\t%<w>0, %<w>1, <shift> %2"
   [(set_attr "v8type" "alu_shift")
+   (set_attr "type" "alu_shift_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2146,6 +2219,7 @@
   ""
   "neg\\t%w0, %w1, <shift> %2"
   [(set_attr "v8type" "alu_shift")
+   (set_attr "type" "alu_shift_imm")
    (set_attr "mode" "SI")]
 )
 
@@ -2157,6 +2231,7 @@
   ""
   "neg\\t%<w>0, %<w>1, lsl %p2"
   [(set_attr "v8type" "alu_shift")
+   (set_attr "type" "alu_shift_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2170,6 +2245,7 @@
   ""
   "neg\\t%w0, %w1, lsl %p2"
   [(set_attr "v8type" "alu_shift")
+   (set_attr "type" "alu_shift_imm")
    (set_attr "mode" "SI")]
 )
 
@@ -2180,6 +2256,7 @@
   ""
   "mul\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "v8type" "mult")
+   (set_attr "type" "mul")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2192,6 +2269,7 @@
   ""
   "mul\\t%w0, %w1, %w2"
   [(set_attr "v8type" "mult")
+   (set_attr "type" "mul")
    (set_attr "mode" "SI")]
 )
 
@@ -2203,6 +2281,7 @@
   ""
   "madd\\t%<w>0, %<w>1, %<w>2, %<w>3"
   [(set_attr "v8type" "madd")
+   (set_attr "type" "mla")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2216,6 +2295,7 @@
   ""
   "madd\\t%w0, %w1, %w2, %w3"
   [(set_attr "v8type" "madd")
+   (set_attr "type" "mla")
    (set_attr "mode" "SI")]
 )
 
@@ -2228,6 +2308,7 @@
   ""
   "msub\\t%<w>0, %<w>1, %<w>2, %<w>3"
   [(set_attr "v8type" "madd")
+   (set_attr "type" "mla")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2242,6 +2323,7 @@
   ""
   "msub\\t%w0, %w1, %w2, %w3"
   [(set_attr "v8type" "madd")
+   (set_attr "type" "mla")
    (set_attr "mode" "SI")]
 )
 
@@ -2253,6 +2335,7 @@
   ""
   "mneg\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "v8type" "mult")
+   (set_attr "type" "mul")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2266,6 +2349,7 @@
   ""
   "mneg\\t%w0, %w1, %w2"
   [(set_attr "v8type" "mult")
+   (set_attr "type" "mul")
    (set_attr "mode" "SI")]
 )
 
@@ -2276,6 +2360,7 @@
   ""
   "<su>mull\\t%0, %w1, %w2"
   [(set_attr "v8type" "mull")
+   (set_attr "type" "<su>mull")
    (set_attr "mode" "DI")]
 )
 
@@ -2288,6 +2373,7 @@
   ""
   "<su>maddl\\t%0, %w1, %w2, %3"
   [(set_attr "v8type" "maddl")
+   (set_attr "type" "<su>mlal")
    (set_attr "mode" "DI")]
 )
 
@@ -2301,6 +2387,7 @@
   ""
   "<su>msubl\\t%0, %w1, %w2, %3"
   [(set_attr "v8type" "maddl")
+   (set_attr "type" "<su>mlal")
    (set_attr "mode" "DI")]
 )
 
@@ -2312,6 +2399,7 @@
   ""
   "<su>mnegl\\t%0, %w1, %w2"
   [(set_attr "v8type" "mull")
+   (set_attr "type" "<su>mull")
    (set_attr "mode" "DI")]
 )
 
@@ -2326,6 +2414,7 @@
   ""
   "<su>mulh\\t%0, %1, %2"
   [(set_attr "v8type" "mulh")
+   (set_attr "type" "<su>mull")
    (set_attr "mode" "DI")]
 )
 
@@ -2336,6 +2425,7 @@
   ""
   "<su>div\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "v8type" "<su>div")
+   (set_attr "type" "<su>div")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2348,6 +2438,7 @@
   ""
   "<su>div\\t%w0, %w1, %w2"
   [(set_attr "v8type" "<su>div")
+   (set_attr "type" "<su>div")
    (set_attr "mode" "SI")]
 )
 
@@ -2357,13 +2448,15 @@
 
 (define_insn "*cmp<mode>"
   [(set (reg:CC CC_REGNUM)
-	(compare:CC (match_operand:GPI 0 "register_operand" "r,r")
-		    (match_operand:GPI 1 "aarch64_plus_operand" "rI,J")))]
+	(compare:CC (match_operand:GPI 0 "register_operand" "r,r,r")
+		    (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")))]
   ""
   "@
    cmp\\t%<w>0, %<w>1
+   cmp\\t%<w>0, %<w>1
    cmn\\t%<w>0, #%n1"
   [(set_attr "v8type" "alus")
+   (set_attr "type" "alus_reg,alus_imm,alus_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2376,6 +2469,7 @@
     fcmp\\t%<s>0, #0.0
     fcmp\\t%<s>0, %<s>1"
   [(set_attr "v8type" "fcmp")
+   (set_attr "type" "fcmp<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2388,6 +2482,7 @@
     fcmpe\\t%<s>0, #0.0
     fcmpe\\t%<s>0, %<s>1"
   [(set_attr "v8type" "fcmp")
+   (set_attr "type" "fcmp<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2400,6 +2495,7 @@
   ""
   "cmp\\t%<w>2, %<w>0, <shift> %1"
   [(set_attr "v8type" "alus_shift")
+   (set_attr "type" "alus_shift_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2411,6 +2507,7 @@
   ""
   "cmp\\t%<GPI:w>1, %<GPI:w>0, <su>xt<ALLX:size>"
   [(set_attr "v8type" "alus_ext")
+   (set_attr "type" "alus_ext")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -2424,6 +2521,7 @@
   ""
   "cmp\\t%<GPI:w>2, %<GPI:w>0, <su>xt<ALLX:size> %1"
   [(set_attr "v8type" "alus_ext")
+   (set_attr "type" "alus_ext")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -2464,6 +2562,7 @@
   ""
   "cset\\t%<w>0, %m1"
   [(set_attr "v8type" "csel")
+   (set_attr "type" "csel")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2476,6 +2575,7 @@
   ""
   "cset\\t%w0, %m1"
   [(set_attr "v8type" "csel")
+   (set_attr "type" "csel")
    (set_attr "mode" "SI")]
 )
 
@@ -2486,6 +2586,7 @@
   ""
   "csetm\\t%<w>0, %m1"
   [(set_attr "v8type" "csel")
+   (set_attr "type" "csel")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2498,6 +2599,7 @@
   ""
   "csetm\\t%w0, %m1"
   [(set_attr "v8type" "csel")
+   (set_attr "type" "csel")
    (set_attr "mode" "SI")]
 )
 
@@ -2552,6 +2654,7 @@
    mov\\t%<w>0, -1
    mov\\t%<w>0, 1"
   [(set_attr "v8type" "csel")
+   (set_attr "type" "csel")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2576,6 +2679,7 @@
    mov\\t%w0, -1
    mov\\t%w0, 1"
   [(set_attr "v8type" "csel")
+   (set_attr "type" "csel")
    (set_attr "mode" "SI")]
 )
 
@@ -2589,6 +2693,7 @@
   "TARGET_FLOAT"
   "fcsel\\t%<s>0, %<s>3, %<s>4, %m1"
   [(set_attr "v8type" "fcsel")
+   (set_attr "type" "fcsel")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2638,6 +2743,7 @@
   ""
   "csinc\\t%<w>0, %<w>1, %<w>1, %M2"
   [(set_attr "v8type" "csel")
+   (set_attr "type" "csel")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "csinc3<mode>_insn"
@@ -2651,6 +2757,7 @@
   ""
   "csinc\\t%<w>0, %<w>4, %<w>3, %M1"
   [(set_attr "v8type" "csel")
+   (set_attr "type" "csel")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2664,6 +2771,7 @@
   ""
   "csinv\\t%<w>0, %<w>4, %<w>3, %M1"
   [(set_attr "v8type" "csel")
+   (set_attr "type" "csel")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*csneg3<mode>_insn"
@@ -2676,6 +2784,7 @@
   ""
   "csneg\\t%<w>0, %<w>4, %<w>3, %M1"
   [(set_attr "v8type" "csel")
+   (set_attr "type" "csel")
    (set_attr "mode" "<MODE>")])
 
 ;; -------------------------------------------------------------------
@@ -2689,6 +2798,7 @@
   ""
   "<logical>\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "v8type" "logic,logic_imm")
+   (set_attr "type" "logic_reg,logic_imm")
    (set_attr "mode" "<MODE>")])
 
 ;; zero_extend version of above
@@ -2700,6 +2810,7 @@
   ""
   "<logical>\\t%w0, %w1, %w2"
   [(set_attr "v8type" "logic,logic_imm")
+   (set_attr "type" "logic_reg,logic_imm")
    (set_attr "mode" "SI")])
 
 (define_insn "*and<mode>3_compare0"
@@ -2713,6 +2824,7 @@
   ""
   "ands\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "v8type" "logics,logics_imm")
+   (set_attr "type" "logics_reg,logics_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2728,6 +2840,7 @@
   ""
   "ands\\t%w0, %w1, %w2"
   [(set_attr "v8type" "logics,logics_imm")
+   (set_attr "type" "logics_reg,logics_imm")
    (set_attr "mode" "SI")]
 )
 
@@ -2744,6 +2857,7 @@
   ""
   "ands\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
   [(set_attr "v8type" "logics_shift")
+   (set_attr "type" "logics_shift_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -2762,6 +2876,7 @@
   ""
   "ands\\t%w0, %w3, %w1, <SHIFT:shift> %2"
   [(set_attr "v8type" "logics_shift")
+   (set_attr "type" "logics_shift_imm")
    (set_attr "mode" "SI")]
 )
 
@@ -2774,6 +2889,7 @@
   ""
   "<LOGICAL:logical>\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
   [(set_attr "v8type" "logic_shift")
+   (set_attr "type" "logic_shift_imm")
    (set_attr "mode" "<MODE>")])
 
 ;; zero_extend version of above
@@ -2787,6 +2903,7 @@
   ""
   "<LOGICAL:logical>\\t%w0, %w3, %w1, <SHIFT:shift> %2"
   [(set_attr "v8type" "logic_shift")
+   (set_attr "type" "logic_shift_imm")
    (set_attr "mode" "SI")])
 
 (define_insn "one_cmpl<mode>2"
@@ -2795,6 +2912,7 @@
   ""
   "mvn\\t%<w>0, %<w>1"
   [(set_attr "v8type" "logic")
+   (set_attr "type" "logic_reg")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*one_cmpl_<optab><mode>2"
@@ -2804,6 +2922,7 @@
   ""
   "mvn\\t%<w>0, %<w>1, <shift> %2"
   [(set_attr "v8type" "logic_shift")
+   (set_attr "type" "logic_shift_imm")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*<LOGICAL:optab>_one_cmpl<mode>3"
@@ -2814,6 +2933,7 @@
   ""
   "<LOGICAL:nlogical>\\t%<w>0, %<w>2, %<w>1"
   [(set_attr "v8type" "logic")
+   (set_attr "type" "logic_reg")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*and_one_cmpl<mode>3_compare0"
@@ -2828,6 +2948,7 @@
   ""
   "bics\\t%<w>0, %<w>2, %<w>1"
   [(set_attr "v8type" "logics")
+   (set_attr "type" "logics_reg")
    (set_attr "mode" "<MODE>")])
 
 ;; zero_extend version of above
@@ -2843,6 +2964,7 @@
   ""
   "bics\\t%w0, %w2, %w1"
   [(set_attr "v8type" "logics")
+   (set_attr "type" "logics_reg")
    (set_attr "mode" "SI")])
 
 (define_insn "*<LOGICAL:optab>_one_cmpl_<SHIFT:optab><mode>3"
@@ -2855,6 +2977,7 @@
   ""
   "<LOGICAL:nlogical>\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
   [(set_attr "v8type" "logic_shift")
+   (set_attr "type" "logics_shift_imm")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*and_one_cmpl_<SHIFT:optab><mode>3_compare0"
@@ -2873,6 +2996,7 @@
   ""
   "bics\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
   [(set_attr "v8type" "logics_shift")
+   (set_attr "type" "logics_shift_imm")
    (set_attr "mode" "<MODE>")])
 
 ;; zero_extend version of above
@@ -2892,6 +3016,7 @@
   ""
   "bics\\t%w0, %w3, %w1, <SHIFT:shift> %2"
   [(set_attr "v8type" "logics_shift")
+   (set_attr "type" "logics_shift_imm")
    (set_attr "mode" "SI")])
 
 (define_insn "clz<mode>2"
@@ -2900,6 +3025,7 @@
   ""
   "clz\\t%<w>0, %<w>1"
   [(set_attr "v8type" "clz")
+   (set_attr "type" "clz")
    (set_attr "mode" "<MODE>")])
 
 (define_expand "ffs<mode>2"
@@ -2923,6 +3049,7 @@
   ""
   "cls\\t%<w>0, %<w>1"
   [(set_attr "v8type" "clz")
+   (set_attr "type" "clz")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "rbit<mode>2"
@@ -2931,6 +3058,7 @@
   ""
   "rbit\\t%<w>0, %<w>1"
   [(set_attr "v8type" "rbit")
+   (set_attr "type" "rbit")
    (set_attr "mode" "<MODE>")])
 
 (define_expand "ctz<mode>2"
@@ -2953,6 +3081,7 @@
   ""
   "tst\\t%<w>0, %<w>1"
   [(set_attr "v8type" "logics")
+   (set_attr "type" "logics_reg")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*and_<SHIFT:optab><mode>3nr_compare0"
@@ -2966,6 +3095,7 @@
   ""
   "tst\\t%<w>2, %<w>0, <SHIFT:shift> %1"
   [(set_attr "v8type" "logics_shift")
+   (set_attr "type" "logics_shift_imm")
    (set_attr "mode" "<MODE>")])
 
 ;; -------------------------------------------------------------------
@@ -3055,14 +3185,184 @@
   }
 )
 
-(define_insn "*<optab><mode>3_insn"
+;; Logical left shift using SISD or Integer instruction
+(define_insn "*aarch64_ashl_sisd_or_int_<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+        (ashift:GPI
+          (match_operand:GPI 1 "register_operand" "w,w,r")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))]
+  ""
+  "@
+   shl\t%<rtn>0<vas>, %<rtn>1<vas>, %2
+   ushl\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
+   lsl\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "simd" "yes,yes,no")
+   (set_attr "simd_type" "simd_shift_imm,simd_shift,*")
+   (set_attr "simd_mode" "<MODE>,<MODE>,*")
+   (set_attr "v8type" "*,*,shift")
+   (set_attr "type" "*,*,shift_reg")
+   (set_attr "mode" "*,*,<MODE>")]
+)
+
+;; Logical right shift using SISD or Integer instruction
+(define_insn "*aarch64_lshr_sisd_or_int_<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+        (lshiftrt:GPI
+          (match_operand:GPI 1 "register_operand" "w,w,r")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))]
+  ""
+  "@
+   ushr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
+   #
+   lsr\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "simd" "yes,yes,no")
+   (set_attr "simd_type" "simd_shift_imm,simd_shift,*")
+   (set_attr "simd_mode" "<MODE>,<MODE>,*")
+   (set_attr "v8type" "*,*,shift")
+   (set_attr "type" "*,*,shift_reg")
+   (set_attr "mode" "*,*,<MODE>")]
+)
+
+(define_split
+  [(set (match_operand:DI 0 "aarch64_simd_register")
+        (lshiftrt:DI
+           (match_operand:DI 1 "aarch64_simd_register")
+           (match_operand:QI 2 "aarch64_simd_register")))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 2)
+        (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
+   (set (match_dup 0)
+        (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_USHL))]
+  ""
+)
+
+(define_split
+  [(set (match_operand:SI 0 "aarch64_simd_register")
+        (lshiftrt:SI
+           (match_operand:SI 1 "aarch64_simd_register")
+           (match_operand:QI 2 "aarch64_simd_register")))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 2)
+        (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
+   (set (match_dup 0)
+        (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_USHL_2S))]
+  ""
+)
+
+;; Arithmetic right shift using SISD or Integer instruction
+(define_insn "*aarch64_ashr_sisd_or_int_<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+        (ashiftrt:GPI
+          (match_operand:GPI 1 "register_operand" "w,w,r")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,rUs<cmode>")))]
+  ""
+  "@
+   sshr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
+   #
+   asr\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "simd" "yes,yes,no")
+   (set_attr "simd_type" "simd_shift_imm,simd_shift,*")
+   (set_attr "simd_mode" "<MODE>,<MODE>,*")
+   (set_attr "v8type" "*,*,shift")
+   (set_attr "type" "*,*,shift_reg")
+   (set_attr "mode" "*,*,<MODE>")]
+)
+
+(define_split
+  [(set (match_operand:DI 0 "aarch64_simd_register")
+        (ashiftrt:DI
+           (match_operand:DI 1 "aarch64_simd_register")
+           (match_operand:QI 2 "aarch64_simd_register")))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 2)
+        (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
+   (set (match_dup 0)
+        (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_SSHL))]
+  ""
+)
+
+(define_split
+  [(set (match_operand:SI 0 "aarch64_simd_register")
+        (ashiftrt:SI
+           (match_operand:SI 1 "aarch64_simd_register")
+           (match_operand:QI 2 "aarch64_simd_register")))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 2)
+        (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
+   (set (match_dup 0)
+        (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_SSHL_2S))]
+  ""
+)
+
+(define_insn "*aarch64_sisd_ushl"
+  [(set (match_operand:DI 0 "register_operand" "=w")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "w")
+                    (match_operand:QI 2 "register_operand" "w")]
+                   UNSPEC_SISD_USHL))]
+  "TARGET_SIMD"
+  "ushl\t%d0, %d1, %d2"
+  [(set_attr "simd" "yes")
+   (set_attr "simd_type" "simd_shift")
+   (set_attr "simd_mode" "DI")]
+)
+
+(define_insn "*aarch64_ushl_2s"
+  [(set (match_operand:SI 0 "register_operand" "=w")
+        (unspec:SI [(match_operand:SI 1 "register_operand" "w")
+                    (match_operand:QI 2 "register_operand" "w")]
+                   UNSPEC_USHL_2S))]
+  "TARGET_SIMD"
+  "ushl\t%0.2s, %1.2s, %2.2s"
+  [(set_attr "simd" "yes")
+   (set_attr "simd_type" "simd_shift")
+   (set_attr "simd_mode" "DI")]
+)
+
+(define_insn "*aarch64_sisd_sshl"
+  [(set (match_operand:DI 0 "register_operand" "=w")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "w")
+                    (match_operand:QI 2 "register_operand" "w")]
+                   UNSPEC_SISD_SSHL))]
+  "TARGET_SIMD"
+  "sshl\t%d0, %d1, %d2"
+  [(set_attr "simd" "yes")
+   (set_attr "simd_type" "simd_shift")
+   (set_attr "simd_mode" "DI")]
+)
+
+(define_insn "*aarch64_sshl_2s"
+  [(set (match_operand:SI 0 "register_operand" "=w")
+        (unspec:SI [(match_operand:SI 1 "register_operand" "w")
+                    (match_operand:QI 2 "register_operand" "w")]
+                   UNSPEC_SSHL_2S))]
+  "TARGET_SIMD"
+  "sshl\t%0.2s, %1.2s, %2.2s"
+  [(set_attr "simd" "yes")
+   (set_attr "simd_type" "simd_shift")
+   (set_attr "simd_mode" "DI")]
+)
+
+(define_insn "*aarch64_sisd_neg_qi"
+  [(set (match_operand:QI 0 "register_operand" "=w")
+        (unspec:QI [(match_operand:QI 1 "register_operand" "w")]
+                   UNSPEC_SISD_NEG))]
+  "TARGET_SIMD"
+  "neg\t%d0, %d1"
+  [(set_attr "simd" "yes")
+   (set_attr "simd_type" "simd_negabs")
+   (set_attr "simd_mode" "QI")]
+)
+
+;; Rotate right
+(define_insn "*ror<mode>3_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-	(SHIFT:GPI
-	 (match_operand:GPI 1 "register_operand" "r")
-	 (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>")))]
+        (rotatert:GPI
+          (match_operand:GPI 1 "register_operand" "r")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>")))]
   ""
-  "<shift>\\t%<w>0, %<w>1, %<w>2"
+  "ror\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "v8type" "shift")
+   (set_attr "type" "shift_reg")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3075,6 +3375,7 @@
   ""
   "<shift>\\t%w0, %w1, %w2"
   [(set_attr "v8type" "shift")
+   (set_attr "type" "shift_reg")
    (set_attr "mode" "SI")]
 )
 
@@ -3085,6 +3386,7 @@
   ""
   "lsl\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "v8type" "shift")
+   (set_attr "type" "shift_reg")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3098,6 +3400,7 @@
   return "<bfshift>\t%w0, %w1, %2, %3";
 }
   [(set_attr "v8type" "bfm")
+   (set_attr "type" "bfm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3111,6 +3414,7 @@
    (UINTVAL (operands[3]) + UINTVAL (operands[4]) == GET_MODE_BITSIZE (<MODE>mode))"
   "extr\\t%<w>0, %<w>1, %<w>2, %4"
   [(set_attr "v8type" "shift")
+   (set_attr "type" "shift_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3126,6 +3430,7 @@
    (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"
   "extr\\t%w0, %w1, %w2, %4"
   [(set_attr "v8type" "shift")
+   (set_attr "type" "shift_imm")
    (set_attr "mode" "SI")]
 )
 
@@ -3139,6 +3444,7 @@
   return "ror\\t%<w>0, %<w>1, %3";
 }
   [(set_attr "v8type" "shift")
+   (set_attr "type" "shift_imm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3154,6 +3460,7 @@
   return "ror\\t%w0, %w1, %3";
 }
   [(set_attr "v8type" "shift")
+   (set_attr "type" "shift_imm")
    (set_attr "mode" "SI")]
 )
 
@@ -3168,6 +3475,7 @@
   return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "v8type" "bfm")
+   (set_attr "type" "bfm")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -3182,6 +3490,7 @@
   return "ubfx\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "v8type" "bfm")
+   (set_attr "type" "bfm")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -3196,6 +3505,7 @@
   return "sbfx\\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "v8type" "bfm")
+   (set_attr "type" "bfm")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -3220,6 +3530,7 @@
   ""
   "<su>bfx\\t%<w>0, %<w>1, %3, %2"
   [(set_attr "v8type" "bfm")
+   (set_attr "type" "bfm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3264,6 +3575,7 @@
 	 > GET_MODE_BITSIZE (<MODE>mode)))"
   "bfi\\t%<w>0, %<w>3, %2, %1"
   [(set_attr "v8type" "bfm")
+   (set_attr "type" "bfm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3279,6 +3591,7 @@
 	 > GET_MODE_BITSIZE (<MODE>mode)))"
   "bfxil\\t%<w>0, %<w>2, %3, %1"
   [(set_attr "v8type" "bfm")
+   (set_attr "type" "bfm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3295,6 +3608,7 @@
   return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3";
 }
   [(set_attr "v8type" "bfm")
+   (set_attr "type" "bfm")
    (set_attr "mode" "<GPI:MODE>")]
 )
 
@@ -3309,6 +3623,7 @@
    && (INTVAL (operands[3]) & ((1 << INTVAL (operands[2])) - 1)) == 0"
   "ubfiz\\t%<w>0, %<w>1, %2, %P3"
   [(set_attr "v8type" "bfm")
+   (set_attr "type" "bfm")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3318,6 +3633,7 @@
   ""
   "rev\\t%<w>0, %<w>1"
   [(set_attr "v8type" "rev")
+   (set_attr "type" "rev")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3327,6 +3643,7 @@
   ""
   "rev16\\t%w0, %w1"
   [(set_attr "v8type" "rev")
+   (set_attr "type" "rev")
    (set_attr "mode" "HI")]
 )
 
@@ -3337,6 +3654,7 @@
   ""
   "rev\\t%w0, %w1"
   [(set_attr "v8type" "rev")
+   (set_attr "type" "rev")
    (set_attr "mode" "SI")]
 )
 
@@ -3354,6 +3672,7 @@
   "TARGET_FLOAT"
   "frint<frint_suffix>\\t%<s>0, %<s>1"
   [(set_attr "v8type" "frint")
+   (set_attr "type" "f_rint<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3366,6 +3685,7 @@
   "TARGET_FLOAT"
   "fcvt<frint_suffix><su>\\t%<GPI:w>0, %<GPF:s>1"
   [(set_attr "v8type" "fcvtf2i")
+   (set_attr "type" "f_cvtf2i")
    (set_attr "mode" "<GPF:MODE>")
    (set_attr "mode2" "<GPI:MODE>")]
 )
@@ -3380,6 +3700,7 @@
   "TARGET_FLOAT"
   "fmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
   [(set_attr "v8type" "fmadd")
+   (set_attr "type" "fmac<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3391,6 +3712,7 @@
   "TARGET_FLOAT"
   "fmsub\\t%<s>0, %<s>1, %<s>2, %<s>3"
   [(set_attr "v8type" "fmadd")
+   (set_attr "type" "fmac<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3402,6 +3724,7 @@
   "TARGET_FLOAT"
   "fnmsub\\t%<s>0, %<s>1, %<s>2, %<s>3"
   [(set_attr "v8type" "fmadd")
+   (set_attr "type" "fmac<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3413,6 +3736,7 @@
   "TARGET_FLOAT"
   "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
   [(set_attr "v8type" "fmadd")
+   (set_attr "type" "fmac<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3425,6 +3749,7 @@
   "!HONOR_SIGNED_ZEROS (<MODE>mode) && TARGET_FLOAT"
   "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
   [(set_attr "v8type" "fmadd")
+   (set_attr "type" "fmac<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3438,6 +3763,7 @@
   "TARGET_FLOAT"
   "fcvt\\t%d0, %s1"
   [(set_attr "v8type" "fcvt")
+   (set_attr "type" "f_cvt")
    (set_attr "mode" "DF")
    (set_attr "mode2" "SF")]
 )
@@ -3448,6 +3774,7 @@
   "TARGET_FLOAT"
   "fcvt\\t%s0, %d1"
   [(set_attr "v8type" "fcvt")
+   (set_attr "type" "f_cvt")
    (set_attr "mode" "SF")
    (set_attr "mode2" "DF")]
 )
@@ -3458,6 +3785,7 @@
   "TARGET_FLOAT"
   "fcvtzs\\t%<GPI:w>0, %<GPF:s>1"
   [(set_attr "v8type" "fcvtf2i")
+   (set_attr "type" "f_cvtf2i")
    (set_attr "mode" "<GPF:MODE>")
    (set_attr "mode2" "<GPI:MODE>")]
 )
@@ -3468,6 +3796,7 @@
   "TARGET_FLOAT"
   "fcvtzu\\t%<GPI:w>0, %<GPF:s>1"
   [(set_attr "v8type" "fcvtf2i")
+   (set_attr "type" "f_cvtf2i")
    (set_attr "mode" "<GPF:MODE>")
    (set_attr "mode2" "<GPI:MODE>")]
 )
@@ -3478,6 +3807,7 @@
   "TARGET_FLOAT"
   "scvtf\\t%<GPF:s>0, %<GPI:w>1"
   [(set_attr "v8type" "fcvti2f")
+   (set_attr "type" "f_cvti2f")
    (set_attr "mode" "<GPF:MODE>")
    (set_attr "mode2" "<GPI:MODE>")]
 )
@@ -3488,6 +3818,7 @@
   "TARGET_FLOAT"
   "ucvtf\\t%<GPF:s>0, %<GPI:w>1"
   [(set_attr "v8type" "fcvt")
+   (set_attr "type" "f_cvt")
    (set_attr "mode" "<GPF:MODE>")
    (set_attr "mode2" "<GPI:MODE>")]
 )
@@ -3504,6 +3835,7 @@
   "TARGET_FLOAT"
   "fadd\\t%<s>0, %<s>1, %<s>2"
   [(set_attr "v8type" "fadd")
+   (set_attr "type" "fadd<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3515,6 +3847,7 @@
   "TARGET_FLOAT"
   "fsub\\t%<s>0, %<s>1, %<s>2"
   [(set_attr "v8type" "fadd")
+   (set_attr "type" "fadd<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3526,6 +3859,7 @@
   "TARGET_FLOAT"
   "fmul\\t%<s>0, %<s>1, %<s>2"
   [(set_attr "v8type" "fmul")
+   (set_attr "type" "fmul<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3537,6 +3871,7 @@
   "TARGET_FLOAT"
   "fnmul\\t%<s>0, %<s>1, %<s>2"
   [(set_attr "v8type" "fmul")
+   (set_attr "type" "fmul<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3548,6 +3883,7 @@
   "TARGET_FLOAT"
   "fdiv\\t%<s>0, %<s>1, %<s>2"
   [(set_attr "v8type" "fdiv")
+   (set_attr "type" "fdiv<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3557,6 +3893,7 @@
   "TARGET_FLOAT"
   "fneg\\t%<s>0, %<s>1"
   [(set_attr "v8type" "ffarith")
+   (set_attr "type" "ffarith<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3566,6 +3903,7 @@
   "TARGET_FLOAT"
   "fsqrt\\t%<s>0, %<s>1"
   [(set_attr "v8type" "fsqrt")
+   (set_attr "type" "fsqrt<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3575,6 +3913,7 @@
   "TARGET_FLOAT"
   "fabs\\t%<s>0, %<s>1"
   [(set_attr "v8type" "ffarith")
+   (set_attr "type" "ffarith<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3589,6 +3928,7 @@
   "TARGET_FLOAT"
   "fmaxnm\\t%<s>0, %<s>1, %<s>2"
   [(set_attr "v8type" "fminmax")
+   (set_attr "type" "f_minmax<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3599,27 +3939,7 @@
   "TARGET_FLOAT"
   "fminnm\\t%<s>0, %<s>1, %<s>2"
   [(set_attr "v8type" "fminmax")
-   (set_attr "mode" "<MODE>")]
-)
-
-(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
-  [(set (match_operand:GPF 0 "register_operand" "=w")
-	(unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
-		    FRECP))]
-  "TARGET_FLOAT"
-  "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
-  [(set_attr "v8type" "frecp<FRECP:frecp_suffix>")
-   (set_attr "mode" "<MODE>")]
-)
-
-(define_insn "aarch64_frecps<mode>"
-  [(set (match_operand:GPF 0 "register_operand" "=w")
-	(unspec:GPF [(match_operand:GPF 1 "register_operand" "w")
-		     (match_operand:GPF 2 "register_operand" "w")]
-		    UNSPEC_FRECPS))]
-  "TARGET_FLOAT"
-  "frecps\\t%<s>0, %<s>1, %<s>2"
-  [(set_attr "v8type" "frecps")
+   (set_attr "type" "f_minmax<s>")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3685,6 +4005,7 @@
   "reload_completed || reload_in_progress"
   "fmov\\t%x0, %d1"
   [(set_attr "v8type" "fmovf2i")
+   (set_attr "type" "f_mrc")
    (set_attr "mode"   "DI")
    (set_attr "length" "4")
   ])
@@ -3697,6 +4018,7 @@
   "reload_completed || reload_in_progress"
   "fmov\\t%x0, %1.d[1]"
   [(set_attr "v8type" "fmovf2i")
+   (set_attr "type" "f_mrc")
    (set_attr "mode"   "DI")
    (set_attr "length" "4")
   ])
@@ -3708,6 +4030,7 @@
   "reload_completed || reload_in_progress"
   "fmov\\t%0.d[1], %x1"
   [(set_attr "v8type" "fmovi2f")
+   (set_attr "type" "f_mcr")
    (set_attr "mode"   "DI")
    (set_attr "length" "4")
   ])
@@ -3718,6 +4041,7 @@
   "reload_completed || reload_in_progress"
   "fmov\\t%d0, %x1"
   [(set_attr "v8type" "fmovi2f")
+   (set_attr "type" "f_mcr")
    (set_attr "mode"   "DI")
    (set_attr "length" "4")
   ])
@@ -3729,6 +4053,7 @@
   "reload_completed || reload_in_progress"
   "fmov\\t%d0, %d1"
   [(set_attr "v8type" "fmovi2f")
+   (set_attr "type" "f_mcr")
    (set_attr "mode"   "DI")
    (set_attr "length" "4")
   ])
@@ -3761,6 +4086,7 @@
   ""
   "add\\t%<w>0, %<w>1, :lo12:%a2"
   [(set_attr "v8type" "alu")
+   (set_attr "type" "alu_reg")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3773,6 +4099,7 @@
   ""
   "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
   [(set_attr "v8type" "load1")
+   (set_attr "type" "load1")
    (set_attr "mode" "<MODE>")]
 )
 
@@ -3786,6 +4113,7 @@
   "TARGET_ILP32"
   "ldr\\t%w0, [%1, #:got_lo12:%a2]"
   [(set_attr "v8type" "load1")
+   (set_attr "type" "load1")
    (set_attr "mode" "DI")]
 )
 
@@ -3796,6 +4124,7 @@
   ""
   "ldr\\t%0, %L1"
   [(set_attr "v8type" "load1")
+   (set_attr "type" "load1")
    (set_attr "mode" "DI")]
 )
 
@@ -3805,6 +4134,7 @@
   ""
   "mrs\\t%0, tpidr_el0"
   [(set_attr "v8type" "mrs")
+   (set_attr "type" "mrs")
    (set_attr "mode" "DI")]
 )
 
@@ -3830,6 +4160,7 @@
   ""
   "adrp\\tx0, %A1\;add\\tx0, x0, %L1\;bl\\t%2\;nop"
   [(set_attr "v8type" "call")
+   (set_attr "type" "call")
    (set_attr "length" "16")])
 
 (define_insn "tlsie_small"
@@ -3839,6 +4170,7 @@
   ""
   "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]"
   [(set_attr "v8type" "load1")
+   (set_attr "type" "load1")
    (set_attr "mode" "DI")
    (set_attr "length" "8")]
 )
@@ -3851,6 +4183,7 @@
   ""
   "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2"
   [(set_attr "v8type" "alu")
+   (set_attr "type" "alu_reg")
    (set_attr "mode" "DI")
    (set_attr "length" "8")]
 )
@@ -3864,6 +4197,7 @@
   "TARGET_TLS_DESC"
   "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
   [(set_attr "v8type" "call")
+   (set_attr "type" "call")
    (set_attr "length" "16")])
 
 (define_insn "stack_tie"
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 99cf123e29e..521b7e817e6 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -29,6 +29,9 @@
 
 #include <stdint.h>
 
+#define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
+#define __AARCH64_INT64_C(__C) ((int64_t) __C)
+
 typedef __builtin_aarch64_simd_qi int8x8_t
   __attribute__ ((__vector_size__ (8)));
 typedef __builtin_aarch64_simd_hi int16x4_t
@@ -446,7 +449,167 @@ typedef struct poly16x8x4_t
   poly16x8_t val[4];
 } poly16x8x4_t;
 
-
+/* vget_lane internal macros.  */
+
+#define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
+  (__cast_ret								\
+     __builtin_aarch64_get_lane##__size (__cast_a __a, __b))
+
+#define __aarch64_vget_lane_f32(__a, __b) \
+  __aarch64_vget_lane_any (v2sf, , , __a, __b)
+#define __aarch64_vget_lane_f64(__a, __b) (__a)
+
+#define __aarch64_vget_lane_p8(__a, __b) \
+  __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
+#define __aarch64_vget_lane_p16(__a, __b) \
+  __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
+
+#define __aarch64_vget_lane_s8(__a, __b) \
+  __aarch64_vget_lane_any (v8qi, , ,__a, __b)
+#define __aarch64_vget_lane_s16(__a, __b) \
+  __aarch64_vget_lane_any (v4hi, , ,__a, __b)
+#define __aarch64_vget_lane_s32(__a, __b) \
+  __aarch64_vget_lane_any (v2si, , ,__a, __b)
+#define __aarch64_vget_lane_s64(__a, __b) (__a)
+
+#define __aarch64_vget_lane_u8(__a, __b) \
+  __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
+#define __aarch64_vget_lane_u16(__a, __b) \
+  __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
+#define __aarch64_vget_lane_u32(__a, __b) \
+  __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
+#define __aarch64_vget_lane_u64(__a, __b) (__a)
+
+#define __aarch64_vgetq_lane_f32(__a, __b) \
+  __aarch64_vget_lane_any (v4sf, , , __a, __b)
+#define __aarch64_vgetq_lane_f64(__a, __b) \
+  __aarch64_vget_lane_any (v2df, , , __a, __b)
+
+#define __aarch64_vgetq_lane_p8(__a, __b) \
+  __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
+#define __aarch64_vgetq_lane_p16(__a, __b) \
+  __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
+
+#define __aarch64_vgetq_lane_s8(__a, __b) \
+  __aarch64_vget_lane_any (v16qi, , ,__a, __b)
+#define __aarch64_vgetq_lane_s16(__a, __b) \
+  __aarch64_vget_lane_any (v8hi, , ,__a, __b)
+#define __aarch64_vgetq_lane_s32(__a, __b) \
+  __aarch64_vget_lane_any (v4si, , ,__a, __b)
+#define __aarch64_vgetq_lane_s64(__a, __b) \
+  __aarch64_vget_lane_any (v2di, , ,__a, __b)
+
+#define __aarch64_vgetq_lane_u8(__a, __b) \
+  __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
+#define __aarch64_vgetq_lane_u16(__a, __b) \
+  __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
+#define __aarch64_vgetq_lane_u32(__a, __b) \
+  __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
+#define __aarch64_vgetq_lane_u64(__a, __b) \
+  __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
+
+/* __aarch64_vdup_lane internal macros.  */
+#define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \
+  vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b))
+
+#define __aarch64_vdup_lane_f32(__a, __b) \
+   __aarch64_vdup_lane_any (f32, , , __a, __b)
+#define __aarch64_vdup_lane_f64(__a, __b) (__a)
+#define __aarch64_vdup_lane_p8(__a, __b) \
+   __aarch64_vdup_lane_any (p8, , , __a, __b)
+#define __aarch64_vdup_lane_p16(__a, __b) \
+   __aarch64_vdup_lane_any (p16, , , __a, __b)
+#define __aarch64_vdup_lane_s8(__a, __b) \
+   __aarch64_vdup_lane_any (s8, , , __a, __b)
+#define __aarch64_vdup_lane_s16(__a, __b) \
+   __aarch64_vdup_lane_any (s16, , , __a, __b)
+#define __aarch64_vdup_lane_s32(__a, __b) \
+   __aarch64_vdup_lane_any (s32, , , __a, __b)
+#define __aarch64_vdup_lane_s64(__a, __b) (__a)
+#define __aarch64_vdup_lane_u8(__a, __b) \
+   __aarch64_vdup_lane_any (u8, , , __a, __b)
+#define __aarch64_vdup_lane_u16(__a, __b) \
+   __aarch64_vdup_lane_any (u16, , , __a, __b)
+#define __aarch64_vdup_lane_u32(__a, __b) \
+   __aarch64_vdup_lane_any (u32, , , __a, __b)
+#define __aarch64_vdup_lane_u64(__a, __b) (__a)
+
+/* __aarch64_vdup_laneq internal macros.  */
+#define __aarch64_vdup_laneq_f32(__a, __b) \
+   __aarch64_vdup_lane_any (f32, , q, __a, __b)
+#define __aarch64_vdup_laneq_f64(__a, __b) \
+   __aarch64_vdup_lane_any (f64, , q, __a, __b)
+#define __aarch64_vdup_laneq_p8(__a, __b) \
+   __aarch64_vdup_lane_any (p8, , q, __a, __b)
+#define __aarch64_vdup_laneq_p16(__a, __b) \
+   __aarch64_vdup_lane_any (p16, , q, __a, __b)
+#define __aarch64_vdup_laneq_s8(__a, __b) \
+   __aarch64_vdup_lane_any (s8, , q, __a, __b)
+#define __aarch64_vdup_laneq_s16(__a, __b) \
+   __aarch64_vdup_lane_any (s16, , q, __a, __b)
+#define __aarch64_vdup_laneq_s32(__a, __b) \
+   __aarch64_vdup_lane_any (s32, , q, __a, __b)
+#define __aarch64_vdup_laneq_s64(__a, __b) \
+   __aarch64_vdup_lane_any (s64, , q, __a, __b)
+#define __aarch64_vdup_laneq_u8(__a, __b) \
+   __aarch64_vdup_lane_any (u8, , q, __a, __b)
+#define __aarch64_vdup_laneq_u16(__a, __b) \
+   __aarch64_vdup_lane_any (u16, , q, __a, __b)
+#define __aarch64_vdup_laneq_u32(__a, __b) \
+   __aarch64_vdup_lane_any (u32, , q, __a, __b)
+#define __aarch64_vdup_laneq_u64(__a, __b) \
+   __aarch64_vdup_lane_any (u64, , q, __a, __b)
+
+/* __aarch64_vdupq_lane internal macros.  */
+#define __aarch64_vdupq_lane_f32(__a, __b) \
+   __aarch64_vdup_lane_any (f32, q, , __a, __b)
+#define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a))
+#define __aarch64_vdupq_lane_p8(__a, __b) \
+   __aarch64_vdup_lane_any (p8, q, , __a, __b)
+#define __aarch64_vdupq_lane_p16(__a, __b) \
+   __aarch64_vdup_lane_any (p16, q, , __a, __b)
+#define __aarch64_vdupq_lane_s8(__a, __b) \
+   __aarch64_vdup_lane_any (s8, q, , __a, __b)
+#define __aarch64_vdupq_lane_s16(__a, __b) \
+   __aarch64_vdup_lane_any (s16, q, , __a, __b)
+#define __aarch64_vdupq_lane_s32(__a, __b) \
+   __aarch64_vdup_lane_any (s32, q, , __a, __b)
+#define __aarch64_vdupq_lane_s64(__a, __b) (vdupq_n_s64 (__a))
+#define __aarch64_vdupq_lane_u8(__a, __b) \
+   __aarch64_vdup_lane_any (u8, q, , __a, __b)
+#define __aarch64_vdupq_lane_u16(__a, __b) \
+   __aarch64_vdup_lane_any (u16, q, , __a, __b)
+#define __aarch64_vdupq_lane_u32(__a, __b) \
+   __aarch64_vdup_lane_any (u32, q, , __a, __b)
+#define __aarch64_vdupq_lane_u64(__a, __b) (vdupq_n_u64 (__a))
+
+/* __aarch64_vdupq_laneq internal macros.  */
+#define __aarch64_vdupq_laneq_f32(__a, __b) \
+   __aarch64_vdup_lane_any (f32, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_f64(__a, __b) \
+   __aarch64_vdup_lane_any (f64, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_p8(__a, __b) \
+   __aarch64_vdup_lane_any (p8, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_p16(__a, __b) \
+   __aarch64_vdup_lane_any (p16, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_s8(__a, __b) \
+   __aarch64_vdup_lane_any (s8, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_s16(__a, __b) \
+   __aarch64_vdup_lane_any (s16, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_s32(__a, __b) \
+   __aarch64_vdup_lane_any (s32, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_s64(__a, __b) \
+   __aarch64_vdup_lane_any (s64, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_u8(__a, __b) \
+   __aarch64_vdup_lane_any (u8, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_u16(__a, __b) \
+   __aarch64_vdup_lane_any (u16, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_u32(__a, __b) \
+   __aarch64_vdup_lane_any (u32, q, q, __a, __b)
+#define __aarch64_vdupq_laneq_u64(__a, __b) \
+   __aarch64_vdup_lane_any (u64, q, q, __a, __b)
+
+/* vadd  */
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vadd_s8 (int8x8_t __a, int8x8_t __b)
 {
@@ -2307,155 +2470,156 @@ vcreate_p16 (uint64_t __a)
   return (poly16x4_t) __a;
 }
 
+/* vget_lane  */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vget_lane_f32 (float32x2_t __a, const int __b)
+{
+  return __aarch64_vget_lane_f32 (__a, __b);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vget_lane_f64 (float64x1_t __a, const int __b)
+{
+  return __aarch64_vget_lane_f64 (__a, __b);
+}
+
+__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
+vget_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return __aarch64_vget_lane_p8 (__a, __b);
+}
+
+__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
+vget_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return __aarch64_vget_lane_p16 (__a, __b);
+}
+
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
 vget_lane_s8 (int8x8_t __a, const int __b)
 {
-  return (int8_t) __builtin_aarch64_get_lane_signedv8qi (__a, __b);
+  return __aarch64_vget_lane_s8 (__a, __b);
 }
 
 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
 vget_lane_s16 (int16x4_t __a, const int __b)
 {
-  return (int16_t) __builtin_aarch64_get_lane_signedv4hi (__a, __b);
+  return __aarch64_vget_lane_s16 (__a, __b);
 }
 
 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vget_lane_s32 (int32x2_t __a, const int __b)
 {
-  return (int32_t) __builtin_aarch64_get_lane_signedv2si (__a, __b);
+  return __aarch64_vget_lane_s32 (__a, __b);
 }
 
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
-vget_lane_f32 (float32x2_t __a, const int __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vget_lane_s64 (int64x1_t __a, const int __b)
 {
-  return (float32_t) __builtin_aarch64_get_lanev2sf (__a, __b);
+  return __aarch64_vget_lane_s64 (__a, __b);
 }
 
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vget_lane_u8 (uint8x8_t __a, const int __b)
 {
-  return (uint8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a,
-							    __b);
+  return __aarch64_vget_lane_u8 (__a, __b);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vget_lane_u16 (uint16x4_t __a, const int __b)
 {
-  return (uint16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a,
-							     __b);
+  return __aarch64_vget_lane_u16 (__a, __b);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vget_lane_u32 (uint32x2_t __a, const int __b)
 {
-  return (uint32_t) __builtin_aarch64_get_lane_unsignedv2si ((int32x2_t) __a,
-							     __b);
+  return __aarch64_vget_lane_u32 (__a, __b);
 }
 
-__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
-vget_lane_p8 (poly8x8_t __a, const int __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vget_lane_u64 (uint64x1_t __a, const int __b)
 {
-  return (poly8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a,
-							    __b);
+  return __aarch64_vget_lane_u64 (__a, __b);
 }
 
-__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
-vget_lane_p16 (poly16x4_t __a, const int __b)
+/* vgetq_lane  */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vgetq_lane_f32 (float32x4_t __a, const int __b)
 {
-  return (poly16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a,
-							     __b);
+  return __aarch64_vgetq_lane_f32 (__a, __b);
 }
 
-__extension__ static __inline int64_t __attribute__ ((__always_inline__))
-vget_lane_s64 (int64x1_t __a, const int __b)
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vgetq_lane_f64 (float64x2_t __a, const int __b)
 {
-  return (int64_t) __builtin_aarch64_get_lanedi (__a, __b);
+  return __aarch64_vgetq_lane_f64 (__a, __b);
 }
 
-__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
-vget_lane_u64 (uint64x1_t __a, const int __b)
+__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
+vgetq_lane_p8 (poly8x16_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_p8 (__a, __b);
+}
+
+__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
+vgetq_lane_p16 (poly16x8_t __a, const int __b)
 {
-  return (uint64_t) __builtin_aarch64_get_lanedi ((int64x1_t) __a, __b);
+  return __aarch64_vgetq_lane_p16 (__a, __b);
 }
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
 vgetq_lane_s8 (int8x16_t __a, const int __b)
 {
-  return (int8_t) __builtin_aarch64_get_lane_signedv16qi (__a, __b);
+  return __aarch64_vgetq_lane_s8 (__a, __b);
 }
 
 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
 vgetq_lane_s16 (int16x8_t __a, const int __b)
 {
-  return (int16_t) __builtin_aarch64_get_lane_signedv8hi (__a, __b);
+  return __aarch64_vgetq_lane_s16 (__a, __b);
 }
 
 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vgetq_lane_s32 (int32x4_t __a, const int __b)
 {
-  return (int32_t) __builtin_aarch64_get_lane_signedv4si (__a, __b);
+  return __aarch64_vgetq_lane_s32 (__a, __b);
 }
 
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
-vgetq_lane_f32 (float32x4_t __a, const int __b)
-{
-  return (float32_t) __builtin_aarch64_get_lanev4sf (__a, __b);
-}
-
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vgetq_lane_f64 (float64x2_t __a, const int __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vgetq_lane_s64 (int64x2_t __a, const int __b)
 {
-  return (float64_t) __builtin_aarch64_get_lanev2df (__a, __b);
+  return __aarch64_vgetq_lane_s64 (__a, __b);
 }
 
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vgetq_lane_u8 (uint8x16_t __a, const int __b)
 {
-  return (uint8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a,
-							     __b);
+  return __aarch64_vgetq_lane_u8 (__a, __b);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vgetq_lane_u16 (uint16x8_t __a, const int __b)
 {
-  return (uint16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a,
-							     __b);
+  return __aarch64_vgetq_lane_u16 (__a, __b);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vgetq_lane_u32 (uint32x4_t __a, const int __b)
 {
-  return (uint32_t) __builtin_aarch64_get_lane_unsignedv4si ((int32x4_t) __a,
-							     __b);
-}
-
-__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
-vgetq_lane_p8 (poly8x16_t __a, const int __b)
-{
-  return (poly8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a,
-							     __b);
-}
-
-__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
-vgetq_lane_p16 (poly16x8_t __a, const int __b)
-{
-  return (poly16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a,
-							     __b);
-}
-
-__extension__ static __inline int64_t __attribute__ ((__always_inline__))
-vgetq_lane_s64 (int64x2_t __a, const int __b)
-{
-  return __builtin_aarch64_get_lane_unsignedv2di (__a, __b);
+  return __aarch64_vgetq_lane_u32 (__a, __b);
 }
 
 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
 vgetq_lane_u64 (uint64x2_t __a, const int __b)
 {
-  return (uint64_t) __builtin_aarch64_get_lane_unsignedv2di ((int64x2_t) __a,
-							     __b);
+  return __aarch64_vgetq_lane_u64 (__a, __b);
 }
 
+/* vreinterpret  */
+
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vreinterpret_p8_s8 (int8x8_t __a)
 {
@@ -3805,6 +3969,85 @@ vreinterpretq_u32_p16 (poly16x8_t __a)
   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
 }
 
+#define __GET_LOW(__TYPE) \
+  uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a);  \
+  uint64_t lo = vgetq_lane_u64 (tmp, 0);  \
+  return vreinterpret_##__TYPE##_u64 (lo);
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vget_low_f32 (float32x4_t __a)
+{
+  __GET_LOW (f32);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vget_low_f64 (float64x2_t __a)
+{
+  return vgetq_lane_f64 (__a, 0);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vget_low_p8 (poly8x16_t __a)
+{
+  __GET_LOW (p8);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vget_low_p16 (poly16x8_t __a)
+{
+  __GET_LOW (p16);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vget_low_s8 (int8x16_t __a)
+{
+  __GET_LOW (s8);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vget_low_s16 (int16x8_t __a)
+{
+  __GET_LOW (s16);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vget_low_s32 (int32x4_t __a)
+{
+  __GET_LOW (s32);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vget_low_s64 (int64x2_t __a)
+{
+  return vgetq_lane_s64 (__a, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vget_low_u8 (uint8x16_t __a)
+{
+  __GET_LOW (u8);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vget_low_u16 (uint16x8_t __a)
+{
+  __GET_LOW (u16);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vget_low_u32 (uint32x4_t __a)
+{
+  __GET_LOW (u32);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vget_low_u64 (uint64x2_t __a)
+{
+  return vgetq_lane_u64 (__a, 0);
+}
+
+#undef __GET_LOW
+
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vcombine_s8 (int8x8_t __a, int8x8_t __b)
 {
@@ -5534,559 +5777,6 @@ vcvtxd_f32_f64 (float64_t a)
   return result;
 }
 
-#define vdup_lane_f32(a, b)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x2_t a_ = (a);                                            \
-       float32x2_t result;                                              \
-       __asm__ ("dup %0.2s,%1.s[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdup_lane_p8(a, b)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       poly8x8_t a_ = (a);                                              \
-       poly8x8_t result;                                                \
-       __asm__ ("dup %0.8b,%1.b[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdup_lane_p16(a, b)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       poly16x4_t a_ = (a);                                             \
-       poly16x4_t result;                                               \
-       __asm__ ("dup %0.4h,%1.h[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdup_lane_s8(a, b)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       int8x8_t a_ = (a);                                               \
-       int8x8_t result;                                                 \
-       __asm__ ("dup %0.8b,%1.b[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdup_lane_s16(a, b)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x4_t a_ = (a);                                              \
-       int16x4_t result;                                                \
-       __asm__ ("dup %0.4h,%1.h[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdup_lane_s32(a, b)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x2_t a_ = (a);                                              \
-       int32x2_t result;                                                \
-       __asm__ ("dup %0.2s,%1.s[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdup_lane_s64(a, b)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x1_t a_ = (a);                                              \
-       int64x1_t result;                                                \
-       __asm__ ("ins %0.d[0],%1.d[%2]"                                  \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdup_lane_u8(a, b)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       uint8x8_t a_ = (a);                                              \
-       uint8x8_t result;                                                \
-       __asm__ ("dup %0.8b,%1.b[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdup_lane_u16(a, b)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x4_t a_ = (a);                                             \
-       uint16x4_t result;                                               \
-       __asm__ ("dup %0.4h,%1.h[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdup_lane_u32(a, b)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x2_t a_ = (a);                                             \
-       uint32x2_t result;                                               \
-       __asm__ ("dup %0.2s,%1.s[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdup_lane_u64(a, b)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       uint64x1_t a_ = (a);                                             \
-       uint64x1_t result;                                               \
-       __asm__ ("ins %0.d[0],%1.d[%2]"                                  \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vdup_n_f32 (float32_t a)
-{
-  float32x2_t result;
-  __asm__ ("dup %0.2s, %w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vdup_n_p8 (uint32_t a)
-{
-  poly8x8_t result;
-  __asm__ ("dup %0.8b,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vdup_n_p16 (uint32_t a)
-{
-  poly16x4_t result;
-  __asm__ ("dup %0.4h,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vdup_n_s8 (int32_t a)
-{
-  int8x8_t result;
-  __asm__ ("dup %0.8b,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vdup_n_s16 (int32_t a)
-{
-  int16x4_t result;
-  __asm__ ("dup %0.4h,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vdup_n_s32 (int32_t a)
-{
-  int32x2_t result;
-  __asm__ ("dup %0.2s,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vdup_n_s64 (int64_t a)
-{
-  int64x1_t result;
-  __asm__ ("ins %0.d[0],%x1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vdup_n_u8 (uint32_t a)
-{
-  uint8x8_t result;
-  __asm__ ("dup %0.8b,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vdup_n_u16 (uint32_t a)
-{
-  uint16x4_t result;
-  __asm__ ("dup %0.4h,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vdup_n_u32 (uint32_t a)
-{
-  uint32x2_t result;
-  __asm__ ("dup %0.2s,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vdup_n_u64 (uint64_t a)
-{
-  uint64x1_t result;
-  __asm__ ("ins %0.d[0],%x1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-#define vdupd_lane_f64(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       float64x2_t a_ = (a);                                            \
-       float64_t result;                                                \
-       __asm__ ("dup %d0, %1.d[%2]"                                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdupq_lane_f32(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x2_t a_ = (a);                                            \
-       float32x4_t result;                                              \
-       __asm__ ("dup %0.4s,%1.s[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdupq_lane_f64(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       float64x1_t a_ = (a);                                            \
-       float64x2_t result;                                              \
-       __asm__ ("dup %0.2d,%1.d[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdupq_lane_p8(a, b)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       poly8x8_t a_ = (a);                                              \
-       poly8x16_t result;                                               \
-       __asm__ ("dup %0.16b,%1.b[%2]"                                   \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdupq_lane_p16(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       poly16x4_t a_ = (a);                                             \
-       poly16x8_t result;                                               \
-       __asm__ ("dup %0.8h,%1.h[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdupq_lane_s8(a, b)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       int8x8_t a_ = (a);                                               \
-       int8x16_t result;                                                \
-       __asm__ ("dup %0.16b,%1.b[%2]"                                   \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdupq_lane_s16(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x4_t a_ = (a);                                              \
-       int16x8_t result;                                                \
-       __asm__ ("dup %0.8h,%1.h[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdupq_lane_s32(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x2_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("dup %0.4s,%1.s[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdupq_lane_s64(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x1_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("dup %0.2d,%1.d[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdupq_lane_u8(a, b)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       uint8x8_t a_ = (a);                                              \
-       uint8x16_t result;                                               \
-       __asm__ ("dup %0.16b,%1.b[%2]"                                   \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdupq_lane_u16(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x4_t a_ = (a);                                             \
-       uint16x8_t result;                                               \
-       __asm__ ("dup %0.8h,%1.h[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdupq_lane_u32(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x2_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("dup %0.4s,%1.s[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vdupq_lane_u64(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       uint64x1_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("dup %0.2d,%1.d[%2]"                                    \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vdupq_n_f32 (float32_t a)
-{
-  float32x4_t result;
-  __asm__ ("dup %0.4s, %w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vdupq_n_f64 (float64_t a)
-{
-  float64x2_t result;
-  __asm__ ("dup %0.2d, %x1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vdupq_n_p8 (uint32_t a)
-{
-  poly8x16_t result;
-  __asm__ ("dup %0.16b,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vdupq_n_p16 (uint32_t a)
-{
-  poly16x8_t result;
-  __asm__ ("dup %0.8h,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vdupq_n_s8 (int32_t a)
-{
-  int8x16_t result;
-  __asm__ ("dup %0.16b,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vdupq_n_s16 (int32_t a)
-{
-  int16x8_t result;
-  __asm__ ("dup %0.8h,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vdupq_n_s32 (int32_t a)
-{
-  int32x4_t result;
-  __asm__ ("dup %0.4s,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vdupq_n_s64 (int64_t a)
-{
-  int64x2_t result;
-  __asm__ ("dup %0.2d,%x1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vdupq_n_u8 (uint32_t a)
-{
-  uint8x16_t result;
-  __asm__ ("dup %0.16b,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vdupq_n_u16 (uint32_t a)
-{
-  uint16x8_t result;
-  __asm__ ("dup %0.8h,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vdupq_n_u32 (uint32_t a)
-{
-  uint32x4_t result;
-  __asm__ ("dup %0.4s,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vdupq_n_u64 (uint64_t a)
-{
-  uint64x2_t result;
-  __asm__ ("dup %0.2d,%x1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-#define vdups_lane_f32(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x4_t a_ = (a);                                            \
-       float32_t result;                                                \
-       __asm__ ("dup %s0, %1.s[%2]"                                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
 #define vext_f32(a, b, c)                                               \
   __extension__                                                         \
     ({                                                                  \
@@ -6724,150 +6414,6 @@ vget_high_u64 (uint64x2_t a)
   return result;
 }
 
-#define vget_lane_f64(a, b)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       float64x1_t a_ = (a);                                            \
-       float64_t result;                                                \
-       __asm__ ("umov %x0, %1.d[%2]"                                    \
-                : "=r"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vget_low_f32 (float32x4_t a)
-{
-  float32x2_t result;
-  __asm__ ("ins %0.d[0], %1.d[0]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-vget_low_f64 (float64x2_t a)
-{
-  float64x1_t result;
-  __asm__ ("ins %0.d[0], %1.d[0]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vget_low_p8 (poly8x16_t a)
-{
-  poly8x8_t result;
-  __asm__ ("ins %0.d[0], %1.d[0]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vget_low_p16 (poly16x8_t a)
-{
-  poly16x4_t result;
-  __asm__ ("ins %0.d[0], %1.d[0]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vget_low_s8 (int8x16_t a)
-{
-  int8x8_t result;
-  __asm__ ("ins %0.d[0], %1.d[0]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vget_low_s16 (int16x8_t a)
-{
-  int16x4_t result;
-  __asm__ ("ins %0.d[0], %1.d[0]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vget_low_s32 (int32x4_t a)
-{
-  int32x2_t result;
-  __asm__ ("ins %0.d[0], %1.d[0]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vget_low_s64 (int64x2_t a)
-{
-  int64x1_t result;
-  __asm__ ("ins %0.d[0], %1.d[0]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vget_low_u8 (uint8x16_t a)
-{
-  uint8x8_t result;
-  __asm__ ("ins %0.d[0], %1.d[0]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vget_low_u16 (uint16x8_t a)
-{
-  uint16x4_t result;
-  __asm__ ("ins %0.d[0], %1.d[0]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vget_low_u32 (uint32x4_t a)
-{
-  uint32x2_t result;
-  __asm__ ("ins %0.d[0], %1.d[0]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vget_low_u64 (uint64x2_t a)
-{
-  uint64x1_t result;
-  __asm__ ("ins %0.d[0], %1.d[0]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vhsub_s8 (int8x8_t a, int8x8_t b)
 {
@@ -7600,7 +7146,7 @@ vld1q_dup_u64 (const uint64_t * a)
        int16x4_t result;                                                \
        __asm__ ("mla %0.4h, %2.4h, %3.h[%4]"                            \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -7628,7 +7174,7 @@ vld1q_dup_u64 (const uint64_t * a)
        uint16x4_t result;                                               \
        __asm__ ("mla %0.4h, %2.4h, %3.h[%4]"                            \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -7656,7 +7202,7 @@ vld1q_dup_u64 (const uint64_t * a)
        int16x4_t result;                                                \
        __asm__ ("mla %0.4h, %2.4h, %3.h[%4]"                            \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -7684,7 +7230,7 @@ vld1q_dup_u64 (const uint64_t * a)
        uint16x4_t result;                                               \
        __asm__ ("mla %0.4h, %2.4h, %3.h[%4]"                            \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -7721,7 +7267,7 @@ vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
   int16x4_t result;
   __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -7743,7 +7289,7 @@ vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
   uint16x4_t result;
   __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -7834,7 +7380,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
        int32x4_t result;                                                \
        __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]"                         \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -7862,7 +7408,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
        uint32x4_t result;                                               \
        __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]"                         \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -7890,7 +7436,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
        int32x4_t result;                                                \
        __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]"                         \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -7918,7 +7464,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
        uint32x4_t result;                                               \
        __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]"                         \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -7943,7 +7489,7 @@ vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
   int32x4_t result;
   __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -7965,7 +7511,7 @@ vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
   uint32x4_t result;
   __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -8056,7 +7602,7 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
        int32x4_t result;                                                \
        __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]"                            \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8084,7 +7630,7 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
        uint32x4_t result;                                               \
        __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]"                            \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8112,7 +7658,7 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
        int32x4_t result;                                                \
        __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]"                          \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8140,7 +7686,7 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
        uint32x4_t result;                                               \
        __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]"                          \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8165,7 +7711,7 @@ vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
   int32x4_t result;
   __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -8187,7 +7733,7 @@ vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
   uint32x4_t result;
   __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -8293,7 +7839,7 @@ vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
        int16x8_t result;                                                \
        __asm__ ("mla %0.8h, %2.8h, %3.h[%4]"                            \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8321,7 +7867,7 @@ vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
        uint16x8_t result;                                               \
        __asm__ ("mla %0.8h, %2.8h, %3.h[%4]"                            \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8349,7 +7895,7 @@ vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
        int16x8_t result;                                                \
        __asm__ ("mla %0.8h, %2.8h, %3.h[%4]"                            \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8377,7 +7923,7 @@ vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
        uint16x8_t result;                                               \
        __asm__ ("mla %0.8h, %2.8h, %3.h[%4]"                            \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8426,7 +7972,7 @@ vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
   int16x8_t result;
   __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -8448,7 +7994,7 @@ vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
   uint16x8_t result;
   __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -8554,7 +8100,7 @@ vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
        int16x4_t result;                                                \
        __asm__ ("mls %0.4h,%2.4h,%3.h[%4]"                              \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8582,7 +8128,7 @@ vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
        uint16x4_t result;                                               \
        __asm__ ("mls %0.4h,%2.4h,%3.h[%4]"                              \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8619,7 +8165,7 @@ vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
   int16x4_t result;
   __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -8641,7 +8187,7 @@ vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
   uint16x4_t result;
   __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -8732,7 +8278,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
        int32x4_t result;                                                \
        __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8760,7 +8306,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
        uint32x4_t result;                                               \
        __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8788,7 +8334,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
        int32x4_t result;                                                \
        __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8816,7 +8362,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
        uint32x4_t result;                                               \
        __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8841,7 +8387,7 @@ vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
   int32x4_t result;
   __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -8863,7 +8409,7 @@ vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
   uint32x4_t result;
   __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -8954,7 +8500,7 @@ vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
        int32x4_t result;                                                \
        __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]"                          \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -8982,7 +8528,7 @@ vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
        uint32x4_t result;                                               \
        __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]"                          \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9010,7 +8556,7 @@ vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
        int32x4_t result;                                                \
        __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]"                          \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9038,7 +8584,7 @@ vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
        uint32x4_t result;                                               \
        __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]"                          \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9063,7 +8609,7 @@ vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
   int32x4_t result;
   __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -9085,7 +8631,7 @@ vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
   uint32x4_t result;
   __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -9191,7 +8737,7 @@ vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
        int16x8_t result;                                                \
        __asm__ ("mls %0.8h,%2.8h,%3.h[%4]"                              \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9219,7 +8765,7 @@ vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
        uint16x8_t result;                                               \
        __asm__ ("mls %0.8h,%2.8h,%3.h[%4]"                              \
                 : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
+                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -9262,7 +8808,7 @@ vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
        int16x8_t __result;						\
        __asm__ ("mls %0.8h, %2.8h, %3.h[%4]"				\
                 : "=w"(__result)					\
-                : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d)		\
+                : "0"(__a_), "w"(__b_), "x"(__c_), "i"(__d)		\
                 : /* No clobbers */);					\
        __result;							\
      })
@@ -9290,7 +8836,7 @@ vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
        uint16x8_t __result;						\
        __asm__ ("mls %0.8h, %2.8h, %3.h[%4]"				\
                 : "=w"(__result)					\
-                : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d)		\
+                : "0"(__a_), "w"(__b_), "x"(__c_), "i"(__d)		\
                 : /* No clobbers */);					\
        __result;							\
      })
@@ -9328,7 +8874,7 @@ vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
   float64x2_t t1;
   __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
            : "=w"(result), "=w"(t1)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -9339,7 +8885,7 @@ vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
   int16x8_t result;
   __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -9361,7 +8907,7 @@ vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
   uint16x8_t result;
   __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
            : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+           : "0"(a), "w"(b), "x"(c)
            : /* No clobbers */);
   return result;
 }
@@ -9699,7 +9245,7 @@ vmovl_u32 (uint32x2_t a)
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vmovn_high_s16 (int8x8_t a, int16x8_t b)
 {
-  int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
+  int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("xtn2 %0.16b,%1.8h"
            : "+w"(result)
            : "w"(b)
@@ -9710,7 +9256,7 @@ vmovn_high_s16 (int8x8_t a, int16x8_t b)
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vmovn_high_s32 (int16x4_t a, int32x4_t b)
 {
-  int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
+  int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("xtn2 %0.8h,%1.4s"
            : "+w"(result)
            : "w"(b)
@@ -9721,7 +9267,7 @@ vmovn_high_s32 (int16x4_t a, int32x4_t b)
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vmovn_high_s64 (int32x2_t a, int64x2_t b)
 {
-  int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
+  int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("xtn2 %0.4s,%1.2d"
            : "+w"(result)
            : "w"(b)
@@ -9732,7 +9278,7 @@ vmovn_high_s64 (int32x2_t a, int64x2_t b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
 {
-  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
+  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("xtn2 %0.16b,%1.8h"
            : "+w"(result)
            : "w"(b)
@@ -9743,7 +9289,7 @@ vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
 {
-  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
+  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("xtn2 %0.8h,%1.4s"
            : "+w"(result)
            : "w"(b)
@@ -9754,7 +9300,7 @@ vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
 {
-  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
+  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("xtn2 %0.4s,%1.2d"
            : "+w"(result)
            : "w"(b)
@@ -10102,7 +9648,7 @@ vmul_n_s16 (int16x4_t a, int16_t b)
   int16x4_t result;
   __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
            : "=w"(result)
-           : "w"(a), "w"(b)
+           : "w"(a), "x"(b)
            : /* No clobbers */);
   return result;
 }
@@ -10124,7 +9670,7 @@ vmul_n_u16 (uint16x4_t a, uint16_t b)
   uint16x4_t result;
   __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
            : "=w"(result)
-           : "w"(a), "w"(b)
+           : "w"(a), "x"(b)
            : /* No clobbers */);
   return result;
 }
@@ -10161,7 +9707,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
        int32x4_t result;                                                \
        __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
                 : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
+                : "w"(a_), "x"(b_), "i"(c)                              \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -10187,7 +9733,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
        uint32x4_t result;                                               \
        __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
                 : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
+                : "w"(a_), "x"(b_), "i"(c)                              \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -10213,7 +9759,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
        int32x4_t result;                                                \
        __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
                 : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
+                : "w"(a_), "x"(b_), "i"(c)                              \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -10239,7 +9785,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
        uint32x4_t result;                                               \
        __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
                 : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
+                : "w"(a_), "x"(b_), "i"(c)                              \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -10263,7 +9809,7 @@ vmull_high_n_s16 (int16x8_t a, int16_t b)
   int32x4_t result;
   __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
            : "=w"(result)
-           : "w"(a), "w"(b)
+           : "w"(a), "x"(b)
            : /* No clobbers */);
   return result;
 }
@@ -10285,7 +9831,7 @@ vmull_high_n_u16 (uint16x8_t a, uint16_t b)
   uint32x4_t result;
   __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
            : "=w"(result)
-           : "w"(a), "w"(b)
+           : "w"(a), "x"(b)
            : /* No clobbers */);
   return result;
 }
@@ -10386,7 +9932,7 @@ vmull_high_u32 (uint32x4_t a, uint32x4_t b)
        int32x4_t result;                                                \
        __asm__ ("smull %0.4s,%1.4h,%2.h[%3]"                            \
                 : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
+                : "w"(a_), "x"(b_), "i"(c)                              \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -10412,7 +9958,7 @@ vmull_high_u32 (uint32x4_t a, uint32x4_t b)
        uint32x4_t result;                                               \
        __asm__ ("umull %0.4s,%1.4h,%2.h[%3]"                            \
                 : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
+                : "w"(a_), "x"(b_), "i"(c)                              \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -10438,7 +9984,7 @@ vmull_high_u32 (uint32x4_t a, uint32x4_t b)
        int32x4_t result;                                                \
        __asm__ ("smull %0.4s, %1.4h, %2.h[%3]"                          \
                 : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
+                : "w"(a_), "x"(b_), "i"(c)                              \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -10464,7 +10010,7 @@ vmull_high_u32 (uint32x4_t a, uint32x4_t b)
        uint32x4_t result;                                               \
        __asm__ ("umull %0.4s, %1.4h, %2.h[%3]"                          \
                 : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
+                : "w"(a_), "x"(b_), "i"(c)                              \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -10488,7 +10034,7 @@ vmull_n_s16 (int16x4_t a, int16_t b)
   int32x4_t result;
   __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
            : "=w"(result)
-           : "w"(a), "w"(b)
+           : "w"(a), "x"(b)
            : /* No clobbers */);
   return result;
 }
@@ -10510,7 +10056,7 @@ vmull_n_u16 (uint16x4_t a, uint16_t b)
   uint32x4_t result;
   __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
            : "=w"(result)
-           : "w"(a), "w"(b)
+           : "w"(a), "x"(b)
            : /* No clobbers */);
   return result;
 }
@@ -10637,7 +10183,7 @@ vmull_u32 (uint32x2_t a, uint32x2_t b)
        int16x8_t result;                                                \
        __asm__ ("mul %0.8h,%1.8h,%2.h[%3]"                              \
                 : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
+                : "w"(a_), "x"(b_), "i"(c)                              \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -10663,7 +10209,7 @@ vmull_u32 (uint32x2_t a, uint32x2_t b)
        uint16x8_t result;                                               \
        __asm__ ("mul %0.8h,%1.8h,%2.h[%3]"                              \
                 : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
+                : "w"(a_), "x"(b_), "i"(c)                              \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -10715,7 +10261,7 @@ vmull_u32 (uint32x2_t a, uint32x2_t b)
        int16x8_t result;                                                \
        __asm__ ("mul %0.8h, %1.8h, %2.h[%3]"                            \
                 : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
+                : "w"(a_), "x"(b_), "i"(c)                              \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -10741,7 +10287,7 @@ vmull_u32 (uint32x2_t a, uint32x2_t b)
        uint16x8_t result;                                               \
        __asm__ ("mul %0.8h, %1.8h, %2.h[%3]"                            \
                 : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
+                : "w"(a_), "x"(b_), "i"(c)                              \
                 : /* No clobbers */);                                   \
        result;                                                          \
      })
@@ -10787,7 +10333,7 @@ vmulq_n_s16 (int16x8_t a, int16_t b)
   int16x8_t result;
   __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
            : "=w"(result)
-           : "w"(a), "w"(b)
+           : "w"(a), "x"(b)
            : /* No clobbers */);
   return result;
 }
@@ -10809,7 +10355,7 @@ vmulq_n_u16 (uint16x8_t a, uint16_t b)
   uint16x8_t result;
   __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
            : "=w"(result)
-           : "w"(a), "w"(b)
+           : "w"(a), "x"(b)
            : /* No clobbers */);
   return result;
 }
@@ -12173,7 +11719,7 @@ vqdmulhq_n_s32 (int32x4_t a, int32_t b)
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
 {
-  int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
+  int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("sqxtn2 %0.16b, %1.8h"
            : "+w"(result)
            : "w"(b)
@@ -12184,7 +11730,7 @@ vqmovn_high_s16 (int8x8_t a, int16x8_t b)
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
 {
-  int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
+  int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("sqxtn2 %0.8h, %1.4s"
            : "+w"(result)
            : "w"(b)
@@ -12195,7 +11741,7 @@ vqmovn_high_s32 (int16x4_t a, int32x4_t b)
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
 {
-  int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
+  int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("sqxtn2 %0.4s, %1.2d"
            : "+w"(result)
            : "w"(b)
@@ -12206,7 +11752,7 @@ vqmovn_high_s64 (int32x2_t a, int64x2_t b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
 {
-  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
+  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("uqxtn2 %0.16b, %1.8h"
            : "+w"(result)
            : "w"(b)
@@ -12217,7 +11763,7 @@ vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
 {
-  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
+  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("uqxtn2 %0.8h, %1.4s"
            : "+w"(result)
            : "w"(b)
@@ -12228,7 +11774,7 @@ vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
 {
-  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
+  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("uqxtn2 %0.4s, %1.2d"
            : "+w"(result)
            : "w"(b)
@@ -12239,7 +11785,7 @@ vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
 {
-  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
+  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("sqxtun2 %0.16b, %1.8h"
            : "+w"(result)
            : "w"(b)
@@ -12250,7 +11796,7 @@ vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
 {
-  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
+  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("sqxtun2 %0.8h, %1.4s"
            : "+w"(result)
            : "w"(b)
@@ -12261,7 +11807,7 @@ vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
 {
-  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
+  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("sqxtun2 %0.4s, %1.2d"
            : "+w"(result)
            : "w"(b)
@@ -12275,7 +11821,7 @@ vqrdmulh_n_s16 (int16x4_t a, int16_t b)
   int16x4_t result;
   __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
            : "=w"(result)
-           : "w"(a), "w"(b)
+           : "w"(a), "x"(b)
            : /* No clobbers */);
   return result;
 }
@@ -12297,7 +11843,7 @@ vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
   int16x8_t result;
   __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
            : "=w"(result)
-           : "w"(a), "w"(b)
+           : "w"(a), "x"(b)
            : /* No clobbers */);
   return result;
 }
@@ -12319,7 +11865,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        int16x8_t b_ = (b);                                              \
        int8x8_t a_ = (a);                                               \
        int8x16_t result = vcombine_s8                                   \
-                            (a_, vcreate_s8 (UINT64_C (0x0)));          \
+                            (a_, vcreate_s8                             \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2"                           \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12333,7 +11880,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        int32x4_t b_ = (b);                                              \
        int16x4_t a_ = (a);                                              \
        int16x8_t result = vcombine_s16                                  \
-                            (a_, vcreate_s16 (UINT64_C (0x0)));         \
+                            (a_, vcreate_s16                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2"                            \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12347,7 +11895,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        int64x2_t b_ = (b);                                              \
        int32x2_t a_ = (a);                                              \
        int32x4_t result = vcombine_s32                                  \
-                            (a_, vcreate_s32 (UINT64_C (0x0)));         \
+                            (a_, vcreate_s32                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2"                            \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12361,7 +11910,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        uint16x8_t b_ = (b);                                             \
        uint8x8_t a_ = (a);                                              \
        uint8x16_t result = vcombine_u8                                  \
-                             (a_, vcreate_u8 (UINT64_C (0x0)));         \
+                             (a_, vcreate_u8                            \
+                                    (__AARCH64_UINT64_C (0x0)));        \
        __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2"                           \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12375,7 +11925,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        uint32x4_t b_ = (b);                                             \
        uint16x4_t a_ = (a);                                             \
        uint16x8_t result = vcombine_u16                                 \
-                             (a_, vcreate_u16 (UINT64_C (0x0)));        \
+                             (a_, vcreate_u16                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
        __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2"                            \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12389,7 +11940,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        uint64x2_t b_ = (b);                                             \
        uint32x2_t a_ = (a);                                             \
        uint32x4_t result = vcombine_u32                                 \
-                             (a_, vcreate_u32 (UINT64_C (0x0)));        \
+                             (a_, vcreate_u32                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
        __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2"                            \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12403,7 +11955,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        int16x8_t b_ = (b);                                              \
        uint8x8_t a_ = (a);                                              \
        uint8x16_t result = vcombine_u8                                  \
-                             (a_, vcreate_u8 (UINT64_C (0x0)));         \
+                             (a_, vcreate_u8                            \
+                                    (__AARCH64_UINT64_C (0x0)));        \
        __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2"                          \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12417,7 +11970,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        int32x4_t b_ = (b);                                              \
        uint16x4_t a_ = (a);                                             \
        uint16x8_t result = vcombine_u16                                 \
-                             (a_, vcreate_u16 (UINT64_C (0x0)));        \
+                             (a_, vcreate_u16                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
        __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2"                           \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12431,7 +11985,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        int64x2_t b_ = (b);                                              \
        uint32x2_t a_ = (a);                                             \
        uint32x4_t result = vcombine_u32                                 \
-                             (a_, vcreate_u32 (UINT64_C (0x0)));        \
+                             (a_, vcreate_u32                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
        __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2"                           \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12445,7 +12000,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        int16x8_t b_ = (b);                                              \
        int8x8_t a_ = (a);                                               \
        int8x16_t result = vcombine_s8                                   \
-                            (a_, vcreate_s8 (UINT64_C (0x0)));          \
+                            (a_, vcreate_s8                             \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("sqshrn2 %0.16b, %1.8h, #%2"                            \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12459,7 +12015,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        int32x4_t b_ = (b);                                              \
        int16x4_t a_ = (a);                                              \
        int16x8_t result = vcombine_s16                                  \
-                            (a_, vcreate_s16 (UINT64_C (0x0)));         \
+                            (a_, vcreate_s16                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("sqshrn2 %0.8h, %1.4s, #%2"                             \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12473,7 +12030,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        int64x2_t b_ = (b);                                              \
        int32x2_t a_ = (a);                                              \
        int32x4_t result = vcombine_s32                                  \
-                            (a_, vcreate_s32 (UINT64_C (0x0)));         \
+                            (a_, vcreate_s32                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("sqshrn2 %0.4s, %1.2d, #%2"                             \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12487,7 +12045,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        uint16x8_t b_ = (b);                                             \
        uint8x8_t a_ = (a);                                              \
        uint8x16_t result = vcombine_u8                                  \
-                             (a_, vcreate_u8 (UINT64_C (0x0)));         \
+                             (a_, vcreate_u8                            \
+                                    (__AARCH64_UINT64_C (0x0)));        \
        __asm__ ("uqshrn2 %0.16b, %1.8h, #%2"                            \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12501,7 +12060,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        uint32x4_t b_ = (b);                                             \
        uint16x4_t a_ = (a);                                             \
        uint16x8_t result = vcombine_u16                                 \
-                             (a_, vcreate_u16 (UINT64_C (0x0)));        \
+                             (a_, vcreate_u16                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
        __asm__ ("uqshrn2 %0.8h, %1.4s, #%2"                             \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12515,7 +12075,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        uint64x2_t b_ = (b);                                             \
        uint32x2_t a_ = (a);                                             \
        uint32x4_t result = vcombine_u32                                 \
-                             (a_, vcreate_u32 (UINT64_C (0x0)));        \
+                             (a_, vcreate_u32                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
        __asm__ ("uqshrn2 %0.4s, %1.2d, #%2"                             \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12529,7 +12090,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        int16x8_t b_ = (b);                                              \
        uint8x8_t a_ = (a);                                              \
        uint8x16_t result = vcombine_u8                                  \
-                             (a_, vcreate_u8 (UINT64_C (0x0)));         \
+                             (a_, vcreate_u8                            \
+                                    (__AARCH64_UINT64_C (0x0)));        \
        __asm__ ("sqshrun2 %0.16b, %1.8h, #%2"                           \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12543,7 +12105,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        int32x4_t b_ = (b);                                              \
        uint16x4_t a_ = (a);                                             \
        uint16x8_t result = vcombine_u16                                 \
-                             (a_, vcreate_u16 (UINT64_C (0x0)));        \
+                             (a_, vcreate_u16                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
        __asm__ ("sqshrun2 %0.8h, %1.4s, #%2"                            \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -12557,7 +12120,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
        int64x2_t b_ = (b);                                              \
        uint32x2_t a_ = (a);                                             \
        uint32x4_t result = vcombine_u32                                 \
-                             (a_, vcreate_u32 (UINT64_C (0x0)));        \
+                             (a_, vcreate_u32                           \
+                                    (__AARCH64_UINT64_C (0x0)));        \
        __asm__ ("sqshrun2 %0.4s, %1.2d, #%2"                            \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -13033,7 +12597,8 @@ vrev64q_u32 (uint32x4_t a)
        int16x8_t b_ = (b);                                              \
        int8x8_t a_ = (a);                                               \
        int8x16_t result = vcombine_s8                                   \
-                            (a_, vcreate_s8 (UINT64_C (0x0)));          \
+                            (a_, vcreate_s8                             \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -13047,7 +12612,8 @@ vrev64q_u32 (uint32x4_t a)
        int32x4_t b_ = (b);                                              \
        int16x4_t a_ = (a);                                              \
        int16x8_t result = vcombine_s16                                  \
-                            (a_, vcreate_s16 (UINT64_C (0x0)));         \
+                            (a_, vcreate_s16                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -13061,7 +12627,8 @@ vrev64q_u32 (uint32x4_t a)
        int64x2_t b_ = (b);                                              \
        int32x2_t a_ = (a);                                              \
        int32x4_t result = vcombine_s32                                  \
-                            (a_, vcreate_s32 (UINT64_C (0x0)));         \
+                            (a_, vcreate_s32                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -13075,7 +12642,8 @@ vrev64q_u32 (uint32x4_t a)
        uint16x8_t b_ = (b);                                             \
        uint8x8_t a_ = (a);                                              \
        uint8x16_t result = vcombine_u8                                  \
-                            (a_, vcreate_u8 (UINT64_C (0x0)));          \
+                            (a_, vcreate_u8                             \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -13089,7 +12657,8 @@ vrev64q_u32 (uint32x4_t a)
        uint32x4_t b_ = (b);                                             \
        uint16x4_t a_ = (a);                                             \
        uint16x8_t result = vcombine_u16                                 \
-                            (a_, vcreate_u16 (UINT64_C (0x0)));         \
+                            (a_, vcreate_u16                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -13103,7 +12672,8 @@ vrev64q_u32 (uint32x4_t a)
        uint64x2_t b_ = (b);                                             \
        uint32x2_t a_ = (a);                                             \
        uint32x4_t result = vcombine_u32                                 \
-                            (a_, vcreate_u32 (UINT64_C (0x0)));         \
+                            (a_, vcreate_u32                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -13340,7 +12910,7 @@ vrsrtsq_f64 (float64x2_t a, float64x2_t b)
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
 {
-  int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
+  int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
            : "+w"(result)
            : "w"(b), "w"(c)
@@ -13351,7 +12921,7 @@ vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
 {
-  int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
+  int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
            : "+w"(result)
            : "w"(b), "w"(c)
@@ -13362,7 +12932,7 @@ vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
 {
-  int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
+  int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
            : "+w"(result)
            : "w"(b), "w"(c)
@@ -13373,7 +12943,7 @@ vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
 {
-  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
+  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
            : "+w"(result)
            : "w"(b), "w"(c)
@@ -13384,7 +12954,7 @@ vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
 {
-  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
+  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
            : "+w"(result)
            : "w"(b), "w"(c)
@@ -13395,7 +12965,7 @@ vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
 {
-  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
+  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
            : "+w"(result)
            : "w"(b), "w"(c)
@@ -13787,7 +13357,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
        int16x8_t b_ = (b);                                              \
        int8x8_t a_ = (a);                                               \
        int8x16_t result = vcombine_s8                                   \
-                            (a_, vcreate_s8 (UINT64_C (0x0)));          \
+                            (a_, vcreate_s8                             \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -13801,7 +13372,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
        int32x4_t b_ = (b);                                              \
        int16x4_t a_ = (a);                                              \
        int16x8_t result = vcombine_s16                                  \
-                            (a_, vcreate_s16 (UINT64_C (0x0)));         \
+                            (a_, vcreate_s16                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -13815,7 +13387,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
        int64x2_t b_ = (b);                                              \
        int32x2_t a_ = (a);                                              \
        int32x4_t result = vcombine_s32                                  \
-                            (a_, vcreate_s32 (UINT64_C (0x0)));         \
+                            (a_, vcreate_s32                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -13829,7 +13402,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
        uint16x8_t b_ = (b);                                             \
        uint8x8_t a_ = (a);                                              \
        uint8x16_t result = vcombine_u8                                  \
-                            (a_, vcreate_u8 (UINT64_C (0x0)));          \
+                            (a_, vcreate_u8                             \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -13843,7 +13417,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
        uint32x4_t b_ = (b);                                             \
        uint16x4_t a_ = (a);                                             \
        uint16x8_t result = vcombine_u16                                 \
-                            (a_, vcreate_u16 (UINT64_C (0x0)));         \
+                            (a_, vcreate_u16                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -13857,7 +13432,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
        uint64x2_t b_ = (b);                                             \
        uint32x2_t a_ = (a);                                             \
        uint32x4_t result = vcombine_u32                                 \
-                            (a_, vcreate_u32 (UINT64_C (0x0)));         \
+                            (a_, vcreate_u32                            \
+                                   (__AARCH64_UINT64_C (0x0)));         \
        __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
@@ -14309,7 +13885,7 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
 {
-  int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
+  int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
            : "+w"(result)
            : "w"(b), "w"(c)
@@ -14320,7 +13896,7 @@ vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
 {
-  int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
+  int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
            : "+w"(result)
            : "w"(b), "w"(c)
@@ -14331,7 +13907,7 @@ vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
 {
-  int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
+  int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
            : "+w"(result)
            : "w"(b), "w"(c)
@@ -14342,7 +13918,7 @@ vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
 {
-  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
+  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
            : "+w"(result)
            : "w"(b), "w"(c)
@@ -14353,7 +13929,7 @@ vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
 {
-  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
+  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
            : "+w"(result)
            : "w"(b), "w"(c)
@@ -14364,7 +13940,7 @@ vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
 {
-  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
+  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
            : "+w"(result)
            : "w"(b), "w"(c)
@@ -16397,7 +15973,7 @@ vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vqtbl1_s8 (int8x16_t a, int8x8_t b)
+vqtbl1_s8 (int8x16_t a, uint8x8_t b)
 {
   int8x8_t result;
   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
@@ -16430,7 +16006,7 @@ vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vqtbl1q_s8 (int8x16_t a, int8x16_t b)
+vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
 {
   int8x16_t result;
   __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
@@ -16452,7 +16028,7 @@ vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vqtbl2_s8 (int8x16x2_t tab, int8x8_t idx)
+vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
 {
   int8x8_t result;
   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
@@ -16488,7 +16064,7 @@ vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vqtbl2q_s8 (int8x16x2_t tab, int8x16_t idx)
+vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
 {
   int8x16_t result;
   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
@@ -16524,7 +16100,7 @@ vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vqtbl3_s8 (int8x16x3_t tab, int8x8_t idx)
+vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
 {
   int8x8_t result;
   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
@@ -16560,7 +16136,7 @@ vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vqtbl3q_s8 (int8x16x3_t tab, int8x16_t idx)
+vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
 {
   int8x16_t result;
   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
@@ -16596,7 +16172,7 @@ vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vqtbl4_s8 (int8x16x4_t tab, int8x8_t idx)
+vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
 {
   int8x8_t result;
   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
@@ -16633,7 +16209,7 @@ vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
 
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vqtbl4q_s8 (int8x16x4_t tab, int8x16_t idx)
+vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
 {
   int8x16_t result;
   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
@@ -16670,7 +16246,7 @@ vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
 
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vqtbx1_s8 (int8x8_t r, int8x16_t tab, int8x8_t idx)
+vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
 {
   int8x8_t result = r;
   __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
@@ -16703,7 +16279,7 @@ vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vqtbx1q_s8 (int8x16_t r, int8x16_t tab, int8x16_t idx)
+vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
 {
   int8x16_t result = r;
   __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
@@ -16736,7 +16312,7 @@ vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, int8x8_t idx)
+vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
 {
   int8x8_t result = r;
   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
@@ -16773,7 +16349,7 @@ vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
 
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, int8x16_t idx)
+vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
 {
   int8x16_t result = r;
   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
@@ -16810,7 +16386,7 @@ vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
 
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, int8x8_t idx)
+vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
 {
   int8x8_t result = r;
   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
@@ -16847,7 +16423,7 @@ vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
 
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, int8x16_t idx)
+vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
 {
   int8x16_t result = r;
   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
@@ -16884,7 +16460,7 @@ vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
 
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, int8x8_t idx)
+vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
 {
   int8x8_t result = r;
   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
@@ -16921,7 +16497,7 @@ vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
 
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, int8x16_t idx)
+vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
 {
   int8x16_t result = r;
   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
@@ -16962,7 +16538,7 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
 {
   int8x8_t result;
-  int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0)));
+  int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
            : "=w"(result)
            : "w"(temp), "w"(idx)
@@ -16974,7 +16550,7 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
 {
   uint8x8_t result;
-  uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0)));
+  uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
            : "=w"(result)
            : "w"(temp), "w"(idx)
@@ -16986,7 +16562,7 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
 {
   poly8x8_t result;
-  poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0)));
+  poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
            : "=w"(result)
            : "w"(temp), "w"(idx)
@@ -17036,7 +16612,7 @@ vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
   int8x8_t result;
   int8x16x2_t temp;
   temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
-  temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0)));
+  temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
            : "=w"(result)
@@ -17051,7 +16627,7 @@ vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
   uint8x8_t result;
   uint8x16x2_t temp;
   temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
-  temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0)));
+  temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
            : "=w"(result)
@@ -17066,7 +16642,7 @@ vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
   poly8x8_t result;
   poly8x16x2_t temp;
   temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
-  temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0)));
+  temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
            : "=w"(result)
@@ -17125,7 +16701,7 @@ vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx)
 {
   int8x8_t result;
   int8x8_t tmp1;
-  int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0)));
+  int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("movi %0.8b, 8\n\t"
 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
 	   "tbl %1.8b, {%2.16b}, %3.8b\n\t"
@@ -17141,7 +16717,7 @@ vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx)
 {
   uint8x8_t result;
   uint8x8_t tmp1;
-  uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0)));
+  uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("movi %0.8b, 8\n\t"
 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
 	   "tbl %1.8b, {%2.16b}, %3.8b\n\t"
@@ -17157,7 +16733,7 @@ vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx)
 {
   poly8x8_t result;
   poly8x8_t tmp1;
-  poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0)));
+  poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("movi %0.8b, 8\n\t"
 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
 	   "tbl %1.8b, {%2.16b}, %3.8b\n\t"
@@ -17211,7 +16787,7 @@ vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx)
   int8x8_t tmp1;
   int8x16x2_t temp;
   temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
-  temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0)));
+  temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
 	   "movi %0.8b, 24\n\t"
 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
@@ -17230,7 +16806,7 @@ vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx)
   uint8x8_t tmp1;
   uint8x16x2_t temp;
   temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
-  temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0)));
+  temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
 	   "movi %0.8b, 24\n\t"
 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
@@ -17249,7 +16825,7 @@ vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx)
   poly8x8_t tmp1;
   poly8x16x2_t temp;
   temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
-  temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0)));
+  temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
   __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
 	   "movi %0.8b, 24\n\t"
 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
@@ -17457,7 +17033,7 @@ vaddvq_s32 (int32x4_t __a)
   return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
 }
 
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
 vaddvq_s64 (int64x2_t __a)
 {
   return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
@@ -17484,7 +17060,7 @@ vaddvq_u32 (uint32x4_t __a)
 		__builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 0);
 }
 
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
 vaddvq_u64 (uint64x2_t __a)
 {
   return vgetq_lane_u64 ((uint64x2_t)
@@ -19727,54 +19303,601 @@ vcvtpq_u64_f64 (float64x2_t __a)
   return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
 }
 
-/* vdup */
+/* vdup_n  */
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vdupb_lane_s8 (int8x16_t a, int const b)
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vdup_n_f32 (float32_t __a)
 {
-  return __builtin_aarch64_dup_lane_scalarv16qi (a, b);
+  return (float32x2_t) {__a, __a};
 }
 
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vdupb_lane_u8 (uint8x16_t a, int const b)
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vdup_n_f64 (float64_t __a)
 {
-  return (uint8x1_t) __builtin_aarch64_dup_lane_scalarv16qi ((int8x16_t) a, b);
+  return __a;
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vduph_lane_s16 (int16x8_t a, int const b)
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vdup_n_p8 (poly8_t __a)
 {
-  return __builtin_aarch64_dup_lane_scalarv8hi (a, b);
+  return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
 }
 
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vduph_lane_u16 (uint16x8_t a, int const b)
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vdup_n_p16 (poly16_t __a)
 {
-  return (uint16x1_t) __builtin_aarch64_dup_lane_scalarv8hi ((int16x8_t) a, b);
+  return (poly16x4_t) {__a, __a, __a, __a};
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vdups_lane_s32 (int32x4_t a, int const b)
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vdup_n_s8 (int8_t __a)
 {
-  return __builtin_aarch64_dup_lane_scalarv4si (a, b);
+  return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
 }
 
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vdups_lane_u32 (uint32x4_t a, int const b)
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vdup_n_s16 (int16_t __a)
+{
+  return (int16x4_t) {__a, __a, __a, __a};
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vdup_n_s32 (int32_t __a)
+{
+  return (int32x2_t) {__a, __a};
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vdup_n_s64 (int64_t __a)
+{
+  return __a;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vdup_n_u8 (uint8_t __a)
+{
+  return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vdup_n_u16 (uint16_t __a)
+{
+  return (uint16x4_t) {__a, __a, __a, __a};
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vdup_n_u32 (uint32_t __a)
+{
+  return (uint32x2_t) {__a, __a};
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vdup_n_u64 (uint64_t __a)
+{
+  return __a;
+}
+
+/* vdupq_n  */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vdupq_n_f32 (float32_t __a)
+{
+  return (float32x4_t) {__a, __a, __a, __a};
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vdupq_n_f64 (float64_t __a)
+{
+  return (float64x2_t) {__a, __a};
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vdupq_n_p8 (uint32_t __a)
+{
+  return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
+		       __a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vdupq_n_p16 (uint32_t __a)
+{
+  return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vdupq_n_s8 (int32_t __a)
+{
+  return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
+		      __a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vdupq_n_s16 (int32_t __a)
+{
+  return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vdupq_n_s32 (int32_t __a)
+{
+  return (int32x4_t) {__a, __a, __a, __a};
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vdupq_n_s64 (int64_t __a)
+{
+  return (int64x2_t) {__a, __a};
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vdupq_n_u8 (uint32_t __a)
+{
+  return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
+		       __a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vdupq_n_u16 (uint32_t __a)
+{
+  return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vdupq_n_u32 (uint32_t __a)
+{
+  return (uint32x4_t) {__a, __a, __a, __a};
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vdupq_n_u64 (uint64_t __a)
 {
-  return (uint32x1_t) __builtin_aarch64_dup_lane_scalarv4si ((int32x4_t) a, b);
+  return (uint64x2_t) {__a, __a};
+}
+
+/* vdup_lane  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vdup_lane_f32 (float32x2_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_f32 (__a, __b);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vdup_lane_f64 (float64x1_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_f64 (__a, __b);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vdup_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_p8 (__a, __b);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vdup_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_p16 (__a, __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vdup_lane_s8 (int8x8_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_s8 (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vdup_lane_s16 (int16x4_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_s16 (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vdup_lane_s32 (int32x2_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_s32 (__a, __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vdupd_lane_s64 (int64x2_t a, int const b)
+vdup_lane_s64 (int64x1_t __a, const int __b)
 {
-  return __builtin_aarch64_dup_lane_scalarv2di (a, b);
+  return __aarch64_vdup_lane_s64 (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vdup_lane_u8 (uint8x8_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_u8 (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vdup_lane_u16 (uint16x4_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_u16 (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vdup_lane_u32 (uint32x2_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_u32 (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vdupd_lane_u64 (uint64x2_t a, int const b)
+vdup_lane_u64 (uint64x1_t __a, const int __b)
+{
+  return __aarch64_vdup_lane_u64 (__a, __b);
+}
+
+/* vdup_laneq  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vdup_laneq_f32 (float32x4_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_f32 (__a, __b);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vdup_laneq_f64 (float64x2_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_f64 (__a, __b);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vdup_laneq_p8 (poly8x16_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_p8 (__a, __b);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vdup_laneq_p16 (poly16x8_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_p16 (__a, __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vdup_laneq_s8 (int8x16_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_s8 (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vdup_laneq_s16 (int16x8_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_s16 (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vdup_laneq_s32 (int32x4_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_s32 (__a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vdup_laneq_s64 (int64x2_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_s64 (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vdup_laneq_u8 (uint8x16_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_u8 (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vdup_laneq_u16 (uint16x8_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_u16 (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vdup_laneq_u32 (uint32x4_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_u32 (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vdup_laneq_u64 (uint64x2_t __a, const int __b)
+{
+  return __aarch64_vdup_laneq_u64 (__a, __b);
+}
+
+/* vdupq_lane  */
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vdupq_lane_f32 (float32x2_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_f32 (__a, __b);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vdupq_lane_f64 (float64x1_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_f64 (__a, __b);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vdupq_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_p8 (__a, __b);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_p16 (__a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vdupq_lane_s8 (int8x8_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_s8 (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_s16 (int16x4_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_s16 (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vdupq_lane_s32 (int32x2_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_s32 (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vdupq_lane_s64 (int64x1_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_s64 (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vdupq_lane_u8 (uint8x8_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_u8 (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_u16 (uint16x4_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_u16 (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vdupq_lane_u32 (uint32x2_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_u32 (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vdupq_lane_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint64x1_t) __builtin_aarch64_dup_lane_scalarv2di ((int64x2_t) a, b);
+  return __aarch64_vdupq_lane_u64 (__a, __b);
+}
+
+/* vdupq_laneq  */
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vdupq_laneq_f32 (float32x4_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_f32 (__a, __b);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vdupq_laneq_f64 (float64x2_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_f64 (__a, __b);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vdupq_laneq_p8 (poly8x16_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_p8 (__a, __b);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vdupq_laneq_p16 (poly16x8_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_p16 (__a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vdupq_laneq_s8 (int8x16_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_s8 (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vdupq_laneq_s16 (int16x8_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_s16 (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vdupq_laneq_s32 (int32x4_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_s32 (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vdupq_laneq_s64 (int64x2_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_s64 (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vdupq_laneq_u8 (uint8x16_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_u8 (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vdupq_laneq_u16 (uint16x8_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_u16 (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vdupq_laneq_u32 (uint32x4_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_u32 (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vdupq_laneq_u64 (uint64x2_t __a, const int __b)
+{
+  return __aarch64_vdupq_laneq_u64 (__a, __b);
+}
+
+/* vdupb_lane  */
+__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
+vdupb_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return __aarch64_vget_lane_p8 (__a, __b);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vdupb_lane_s8 (int8x8_t __a, const int __b)
+{
+  return __aarch64_vget_lane_s8 (__a, __b);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vdupb_lane_u8 (uint8x8_t __a, const int __b)
+{
+  return __aarch64_vget_lane_u8 (__a, __b);
+}
+
+/* vduph_lane  */
+__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
+vduph_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return __aarch64_vget_lane_p16 (__a, __b);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vduph_lane_s16 (int16x4_t __a, const int __b)
+{
+  return __aarch64_vget_lane_s16 (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vduph_lane_u16 (uint16x4_t __a, const int __b)
+{
+  return __aarch64_vget_lane_u16 (__a, __b);
+}
+
+/* vdups_lane  */
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vdups_lane_f32 (float32x2_t __a, const int __b)
+{
+  return __aarch64_vget_lane_f32 (__a, __b);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vdups_lane_s32 (int32x2_t __a, const int __b)
+{
+  return __aarch64_vget_lane_s32 (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vdups_lane_u32 (uint32x2_t __a, const int __b)
+{
+  return __aarch64_vget_lane_u32 (__a, __b);
+}
+
+/* vdupd_lane  */
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vdupd_lane_f64 (float64x1_t __a, const int __attribute__ ((unused)) __b)
+{
+  return __a;
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vdupd_lane_s64 (int64x1_t __a, const int __attribute__ ((unused)) __b)
+{
+  return __a;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vdupd_lane_u64 (uint64x1_t __a, const int __attribute__ ((unused)) __b)
+{
+  return __a;
+}
+
+/* vdupb_laneq  */
+__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
+vdupb_laneq_p8 (poly8x16_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_p8 (__a, __b);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
+{
+  return __aarch64_vgetq_lane_s8 (__a, __b);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vdupb_laneq_u8 (uint8x16_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_u8 (__a, __b);
+}
+
+/* vduph_laneq  */
+__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
+vduph_laneq_p16 (poly16x8_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_p16 (__a, __b);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vduph_laneq_s16 (int16x8_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_s16 (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vduph_laneq_u16 (uint16x8_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_u16 (__a, __b);
+}
+
+/* vdups_laneq  */
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vdups_laneq_f32 (float32x4_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_f32 (__a, __b);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vdups_laneq_s32 (int32x4_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_s32 (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vdups_laneq_u32 (uint32x4_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_u32 (__a, __b);
+}
+
+/* vdupd_laneq  */
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vdupd_laneq_f64 (float64x2_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_f64 (__a, __b);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vdupd_laneq_s64 (int64x2_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_s64 (__a, __b);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vdupd_laneq_u64 (uint64x2_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_u64 (__a, __b);
 }
 
 /* vld1 */
@@ -21425,7 +21548,7 @@ vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
 {
-  int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0)));
+  int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
   return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
 }
 
@@ -21476,7 +21599,7 @@ vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
 {
-  int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0)));
+  int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
   return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
 }
 
@@ -21553,7 +21676,7 @@ vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
 {
-  int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0)));
+  int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
   return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
 }
 
@@ -21604,7 +21727,7 @@ vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
 {
-  int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0)));
+  int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
   return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
 }
 
@@ -21729,7 +21852,7 @@ vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
 {
-  int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0)));
+  int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0)));
   return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
 }
 
@@ -21778,7 +21901,7 @@ vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
 {
-  int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0)));
+  int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0)));
   return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
 }
 
@@ -24365,8 +24488,8 @@ vst2_s64 (int64_t * __a, int64x1x2_t val)
 {
   __builtin_aarch64_simd_oi __o;
   int64x2x2_t temp;
-  temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
-  temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
+  temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
   __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
@@ -24377,8 +24500,8 @@ vst2_u64 (uint64_t * __a, uint64x1x2_t val)
 {
   __builtin_aarch64_simd_oi __o;
   uint64x2x2_t temp;
-  temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
-  temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
+  temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
   __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
@@ -24389,8 +24512,8 @@ vst2_f64 (float64_t * __a, float64x1x2_t val)
 {
   __builtin_aarch64_simd_oi __o;
   float64x2x2_t temp;
-  temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
-  temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
+  temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
   __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
@@ -24401,8 +24524,8 @@ vst2_s8 (int8_t * __a, int8x8x2_t val)
 {
   __builtin_aarch64_simd_oi __o;
   int8x16x2_t temp;
-  temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
-  temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
+  temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
@@ -24413,8 +24536,8 @@ vst2_p8 (poly8_t * __a, poly8x8x2_t val)
 {
   __builtin_aarch64_simd_oi __o;
   poly8x16x2_t temp;
-  temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
-  temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
+  temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
@@ -24425,8 +24548,8 @@ vst2_s16 (int16_t * __a, int16x4x2_t val)
 {
   __builtin_aarch64_simd_oi __o;
   int16x8x2_t temp;
-  temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
-  temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
+  temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
@@ -24437,8 +24560,8 @@ vst2_p16 (poly16_t * __a, poly16x4x2_t val)
 {
   __builtin_aarch64_simd_oi __o;
   poly16x8x2_t temp;
-  temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
-  temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
+  temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
@@ -24449,8 +24572,8 @@ vst2_s32 (int32_t * __a, int32x2x2_t val)
 {
   __builtin_aarch64_simd_oi __o;
   int32x4x2_t temp;
-  temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
-  temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
+  temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
   __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
@@ -24461,8 +24584,8 @@ vst2_u8 (uint8_t * __a, uint8x8x2_t val)
 {
   __builtin_aarch64_simd_oi __o;
   uint8x16x2_t temp;
-  temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
-  temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
+  temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
@@ -24473,8 +24596,8 @@ vst2_u16 (uint16_t * __a, uint16x4x2_t val)
 {
   __builtin_aarch64_simd_oi __o;
   uint16x8x2_t temp;
-  temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
-  temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
+  temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
@@ -24485,8 +24608,8 @@ vst2_u32 (uint32_t * __a, uint32x2x2_t val)
 {
   __builtin_aarch64_simd_oi __o;
   uint32x4x2_t temp;
-  temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
-  temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
+  temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
   __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
@@ -24497,8 +24620,8 @@ vst2_f32 (float32_t * __a, float32x2x2_t val)
 {
   __builtin_aarch64_simd_oi __o;
   float32x4x2_t temp;
-  temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
-  temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
+  temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
   __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
@@ -24617,9 +24740,9 @@ vst3_s64 (int64_t * __a, int64x1x3_t val)
 {
   __builtin_aarch64_simd_ci __o;
   int64x2x3_t temp;
-  temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
-  temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
-  temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0)));
+  temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
@@ -24631,9 +24754,9 @@ vst3_u64 (uint64_t * __a, uint64x1x3_t val)
 {
   __builtin_aarch64_simd_ci __o;
   uint64x2x3_t temp;
-  temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
-  temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
-  temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0)));
+  temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
@@ -24645,9 +24768,9 @@ vst3_f64 (float64_t * __a, float64x1x3_t val)
 {
   __builtin_aarch64_simd_ci __o;
   float64x2x3_t temp;
-  temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
-  temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
-  temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0)));
+  temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
@@ -24659,9 +24782,9 @@ vst3_s8 (int8_t * __a, int8x8x3_t val)
 {
   __builtin_aarch64_simd_ci __o;
   int8x16x3_t temp;
-  temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
-  temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
-  temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0)));
+  temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
@@ -24673,9 +24796,9 @@ vst3_p8 (poly8_t * __a, poly8x8x3_t val)
 {
   __builtin_aarch64_simd_ci __o;
   poly8x16x3_t temp;
-  temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
-  temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
-  temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0)));
+  temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
@@ -24687,9 +24810,9 @@ vst3_s16 (int16_t * __a, int16x4x3_t val)
 {
   __builtin_aarch64_simd_ci __o;
   int16x8x3_t temp;
-  temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
-  temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
-  temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0)));
+  temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
@@ -24701,9 +24824,9 @@ vst3_p16 (poly16_t * __a, poly16x4x3_t val)
 {
   __builtin_aarch64_simd_ci __o;
   poly16x8x3_t temp;
-  temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
-  temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
-  temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0)));
+  temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
@@ -24715,9 +24838,9 @@ vst3_s32 (int32_t * __a, int32x2x3_t val)
 {
   __builtin_aarch64_simd_ci __o;
   int32x4x3_t temp;
-  temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
-  temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
-  temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0)));
+  temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
@@ -24729,9 +24852,9 @@ vst3_u8 (uint8_t * __a, uint8x8x3_t val)
 {
   __builtin_aarch64_simd_ci __o;
   uint8x16x3_t temp;
-  temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
-  temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
-  temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0)));
+  temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
@@ -24743,9 +24866,9 @@ vst3_u16 (uint16_t * __a, uint16x4x3_t val)
 {
   __builtin_aarch64_simd_ci __o;
   uint16x8x3_t temp;
-  temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
-  temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
-  temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0)));
+  temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
@@ -24757,9 +24880,9 @@ vst3_u32 (uint32_t * __a, uint32x2x3_t val)
 {
   __builtin_aarch64_simd_ci __o;
   uint32x4x3_t temp;
-  temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
-  temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
-  temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0)));
+  temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
@@ -24771,9 +24894,9 @@ vst3_f32 (float32_t * __a, float32x2x3_t val)
 {
   __builtin_aarch64_simd_ci __o;
   float32x4x3_t temp;
-  temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
-  temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
-  temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0)));
+  temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
@@ -24905,10 +25028,10 @@ vst4_s64 (int64_t * __a, int64x1x4_t val)
 {
   __builtin_aarch64_simd_xi __o;
   int64x2x4_t temp;
-  temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
-  temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
-  temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0)));
-  temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (INT64_C (0)));
+  temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
+  temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
@@ -24921,10 +25044,10 @@ vst4_u64 (uint64_t * __a, uint64x1x4_t val)
 {
   __builtin_aarch64_simd_xi __o;
   uint64x2x4_t temp;
-  temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
-  temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
-  temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0)));
-  temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (UINT64_C (0)));
+  temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
@@ -24937,10 +25060,10 @@ vst4_f64 (float64_t * __a, float64x1x4_t val)
 {
   __builtin_aarch64_simd_xi __o;
   float64x2x4_t temp;
-  temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
-  temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
-  temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0)));
-  temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (UINT64_C (0)));
+  temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
@@ -24953,10 +25076,10 @@ vst4_s8 (int8_t * __a, int8x8x4_t val)
 {
   __builtin_aarch64_simd_xi __o;
   int8x16x4_t temp;
-  temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
-  temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
-  temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0)));
-  temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (INT64_C (0)));
+  temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
+  temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
@@ -24969,10 +25092,10 @@ vst4_p8 (poly8_t * __a, poly8x8x4_t val)
 {
   __builtin_aarch64_simd_xi __o;
   poly8x16x4_t temp;
-  temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
-  temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
-  temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0)));
-  temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (UINT64_C (0)));
+  temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
@@ -24985,10 +25108,10 @@ vst4_s16 (int16_t * __a, int16x4x4_t val)
 {
   __builtin_aarch64_simd_xi __o;
   int16x8x4_t temp;
-  temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
-  temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
-  temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0)));
-  temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (INT64_C (0)));
+  temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
+  temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
@@ -25001,10 +25124,10 @@ vst4_p16 (poly16_t * __a, poly16x4x4_t val)
 {
   __builtin_aarch64_simd_xi __o;
   poly16x8x4_t temp;
-  temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
-  temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
-  temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0)));
-  temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (UINT64_C (0)));
+  temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
@@ -25017,10 +25140,10 @@ vst4_s32 (int32_t * __a, int32x2x4_t val)
 {
   __builtin_aarch64_simd_xi __o;
   int32x4x4_t temp;
-  temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
-  temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
-  temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0)));
-  temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (INT64_C (0)));
+  temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
+  temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
+  temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
+  temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
@@ -25033,10 +25156,10 @@ vst4_u8 (uint8_t * __a, uint8x8x4_t val)
 {
   __builtin_aarch64_simd_xi __o;
   uint8x16x4_t temp;
-  temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
-  temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
-  temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0)));
-  temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (UINT64_C (0)));
+  temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
@@ -25049,10 +25172,10 @@ vst4_u16 (uint16_t * __a, uint16x4x4_t val)
 {
   __builtin_aarch64_simd_xi __o;
   uint16x8x4_t temp;
-  temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
-  temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
-  temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0)));
-  temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (UINT64_C (0)));
+  temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
@@ -25065,10 +25188,10 @@ vst4_u32 (uint32_t * __a, uint32x2x4_t val)
 {
   __builtin_aarch64_simd_xi __o;
   uint32x4x4_t temp;
-  temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
-  temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
-  temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0)));
-  temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (UINT64_C (0)));
+  temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
@@ -25081,10 +25204,10 @@ vst4_f32 (float32_t * __a, float32x2x4_t val)
 {
   __builtin_aarch64_simd_xi __o;
   float32x4x4_t temp;
-  temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
-  temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
-  temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0)));
-  temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (UINT64_C (0)));
+  temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
@@ -25581,4 +25704,81 @@ __INTERLEAVE_LIST (zip)
 
 /* End of optimal implementations in approved order.  */
 
+#undef __aarch64_vget_lane_any
+#undef __aarch64_vget_lane_f32
+#undef __aarch64_vget_lane_f64
+#undef __aarch64_vget_lane_p8
+#undef __aarch64_vget_lane_p16
+#undef __aarch64_vget_lane_s8
+#undef __aarch64_vget_lane_s16
+#undef __aarch64_vget_lane_s32
+#undef __aarch64_vget_lane_s64
+#undef __aarch64_vget_lane_u8
+#undef __aarch64_vget_lane_u16
+#undef __aarch64_vget_lane_u32
+#undef __aarch64_vget_lane_u64
+
+#undef __aarch64_vgetq_lane_f32
+#undef __aarch64_vgetq_lane_f64
+#undef __aarch64_vgetq_lane_p8
+#undef __aarch64_vgetq_lane_p16
+#undef __aarch64_vgetq_lane_s8
+#undef __aarch64_vgetq_lane_s16
+#undef __aarch64_vgetq_lane_s32
+#undef __aarch64_vgetq_lane_s64
+#undef __aarch64_vgetq_lane_u8
+#undef __aarch64_vgetq_lane_u16
+#undef __aarch64_vgetq_lane_u32
+#undef __aarch64_vgetq_lane_u64
+
+#undef __aarch64_vdup_lane_any
+#undef __aarch64_vdup_lane_f32
+#undef __aarch64_vdup_lane_f64
+#undef __aarch64_vdup_lane_p8
+#undef __aarch64_vdup_lane_p16
+#undef __aarch64_vdup_lane_s8
+#undef __aarch64_vdup_lane_s16
+#undef __aarch64_vdup_lane_s32
+#undef __aarch64_vdup_lane_s64
+#undef __aarch64_vdup_lane_u8
+#undef __aarch64_vdup_lane_u16
+#undef __aarch64_vdup_lane_u32
+#undef __aarch64_vdup_lane_u64
+#undef __aarch64_vdup_laneq_f32
+#undef __aarch64_vdup_laneq_f64
+#undef __aarch64_vdup_laneq_p8
+#undef __aarch64_vdup_laneq_p16
+#undef __aarch64_vdup_laneq_s8
+#undef __aarch64_vdup_laneq_s16
+#undef __aarch64_vdup_laneq_s32
+#undef __aarch64_vdup_laneq_s64
+#undef __aarch64_vdup_laneq_u8
+#undef __aarch64_vdup_laneq_u16
+#undef __aarch64_vdup_laneq_u32
+#undef __aarch64_vdup_laneq_u64
+#undef __aarch64_vdupq_lane_f32
+#undef __aarch64_vdupq_lane_f64
+#undef __aarch64_vdupq_lane_p8
+#undef __aarch64_vdupq_lane_p16
+#undef __aarch64_vdupq_lane_s8
+#undef __aarch64_vdupq_lane_s16
+#undef __aarch64_vdupq_lane_s32
+#undef __aarch64_vdupq_lane_s64
+#undef __aarch64_vdupq_lane_u8
+#undef __aarch64_vdupq_lane_u16
+#undef __aarch64_vdupq_lane_u32
+#undef __aarch64_vdupq_lane_u64
+#undef __aarch64_vdupq_laneq_f32
+#undef __aarch64_vdupq_laneq_f64
+#undef __aarch64_vdupq_laneq_p8
+#undef __aarch64_vdupq_laneq_p16
+#undef __aarch64_vdupq_laneq_s8
+#undef __aarch64_vdupq_laneq_s16
+#undef __aarch64_vdupq_laneq_s32
+#undef __aarch64_vdupq_laneq_s64
+#undef __aarch64_vdupq_laneq_u8
+#undef __aarch64_vdupq_laneq_u16
+#undef __aarch64_vdupq_laneq_u32
+#undef __aarch64_vdupq_laneq_u64
+
 #endif
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 3ec889f28fd..ffe125b5583 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -134,9 +134,15 @@
 ;; Vector modes except double int.
 (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
 
+;; Vector modes for Q and H types.
+(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI])
+
 ;; Vector modes for H and S types.
 (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI])
 
+;; Vector modes for Q, H and S types.
+(define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI])
+
 ;; Vector and scalar integer modes for H and S
 (define_mode_iterator VSDQ_HSI [V4HI V8HI V2SI V4SI HI SI])
 
@@ -377,7 +383,7 @@
 			(V4HI "V8HI") (V8HI "V8HI")
 			(V2SI "V4SI") (V4SI "V4SI")
 			(DI   "V2DI") (V2DI "V2DI")
-			(V2SF "V2SF") (V4SF "V4SF")
+			(V2SF "V4SF") (V4SF "V4SF")
 			(V2DF "V2DF") (SI   "V4SI")
 			(HI   "V8HI") (QI   "V16QI")])
 
@@ -453,6 +459,15 @@
                         (V2SF "s") (V4SF "s")
                         (V2DF "d")])
 
+;; Corresponding core element mode for each vector mode.  This is a
+;; variation on <vw> mapping FP modes to GP regs.
+(define_mode_attr vwcore  [(V8QI "w") (V16QI "w")
+			   (V4HI "w") (V8HI "w")
+			   (V2SI "w") (V4SI "w")
+			   (DI   "x") (V2DI "x")
+			   (V2SF "w") (V4SF "w")
+			   (V2DF "x")])
+
 ;; Double vector types for ALLX.
 (define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")])
 
@@ -512,6 +527,20 @@
 (define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")])
 (define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")])
 
+(define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI")
+				(V4HI "V8HI") (V8HI  "V4HI")
+				(V2SI "V4SI") (V4SI  "V2SI")
+				(DI   "V2DI") (V2DI  "DI")
+				(V2SF "V4SF") (V4SF  "V2SF")
+				(DF   "V2DF") (V2DF  "DF")])
+
+(define_mode_attr vswap_width_name [(V8QI "to_128") (V16QI "to_64")
+				    (V4HI "to_128") (V8HI  "to_64")
+				    (V2SI "to_128") (V4SI  "to_64")
+				    (DI   "to_128") (V2DI  "to_64")
+				    (V2SF "to_128") (V4SF  "to_64")
+				    (DF   "to_128") (V2DF  "to_64")])
+
 ;; -------------------------------------------------------------------
 ;; Code Iterators
 ;; -------------------------------------------------------------------
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 3e2b6b34357..dbc90826665 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -26,6 +26,11 @@
 			      && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC"))))
 )
 
+(define_predicate "aarch64_simd_register"
+  (and (match_code "reg")
+       (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS")
+            (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_REGS"))))
+
 (define_predicate "aarch64_reg_or_zero"
   (and (match_code "reg,subreg,const_int")
        (ior (match_operand 0 "register_operand")
diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64
index 2975850dcb9..9f8d8cd6e0d 100644
--- a/gcc/config/aarch64/t-aarch64
+++ b/gcc/config/aarch64/t-aarch64
@@ -35,6 +35,11 @@ aarch64-builtins.o: $(srcdir)/config/aarch64/aarch64-builtins.c $(CONFIG_H) \
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/aarch64/aarch64-builtins.c
 
+aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \
+    coretypes.h $(TM_H) $(TM_P_H) $(RTL_H) $(TREE_H) output.h $(C_COMMON_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/arm/aarch-common.c
+
 comma=,
 MULTILIB_OPTIONS    = $(patsubst %, mabi=%, $(subst $(comma), ,$(TM_MULTILIB_CONFIG)))
 MULTILIB_DIRNAMES   = $(subst $(comma), ,$(TM_MULTILIB_CONFIG))
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index 5f5b33e347b..a8fb92964eb 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -2659,6 +2659,7 @@ alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
       cmp_mode = cmp_mode == DImode ? DFmode : DImode;
       op0 = gen_lowpart (cmp_mode, tem);
       op1 = CONST0_RTX (cmp_mode);
+      cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
       local_fast_math = 1;
     }
 
diff --git a/gcc/config/alpha/linux.h b/gcc/config/alpha/linux.h
index fbf4a07eb45..da5842fda85 100644
--- a/gcc/config/alpha/linux.h
+++ b/gcc/config/alpha/linux.h
@@ -59,16 +59,18 @@ along with GCC; see the file COPYING3.  If not see
 
 #ifdef SINGLE_LIBC
 #define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
+#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
+#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
 #else
 #define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
+#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
+#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
 #endif
 
-/* Determine whether the entire c99 runtime is present in the
-   runtime library.  */
-#define TARGET_C99_FUNCTIONS (OPTION_GLIBC)
-
-/* Whether we have sincos that follows the GNU extension.  */
-#define TARGET_HAS_SINCOS (OPTION_GLIBC)
+/* Determine what functions are present at the runtime;
+   this includes full c99 runtime and sincos.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION linux_android_libc_has_function
 
 #define TARGET_POSIX_IO
 
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
new file mode 100644
index 00000000000..97768fce0ca
--- /dev/null
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -0,0 +1,36 @@
+/* Function prototypes for instruction scheduling dependeoncy routines,
+   defined in aarch-common.c
+
+   Copyright (C) 1991-2013 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#ifndef GCC_AARCH_COMMON_PROTOS_H
+#define GCC_AARCH_COMMON_PROTOS_H
+
+extern int arm_early_load_addr_dep (rtx, rtx);
+extern int arm_early_store_addr_dep (rtx, rtx);
+extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
+extern int arm_mac_accumulator_is_result (rtx, rtx);
+extern int arm_no_early_alu_shift_dep (rtx, rtx);
+extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
+extern int arm_no_early_mul_dep (rtx, rtx);
+extern int arm_no_early_store_addr_dep (rtx, rtx);
+
+#endif /* GCC_AARCH_COMMON_PROTOS_H */
diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
new file mode 100644
index 00000000000..69366af9bd5
--- /dev/null
+++ b/gcc/config/arm/aarch-common.c
@@ -0,0 +1,278 @@
+/* Dependency checks for instruction scheduling, shared between ARM and
+   AARCH64.
+
+   Copyright (C) 1991-2013 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* Return nonzero if the CONSUMER instruction (a load) does need
+   PRODUCER's value to calculate the address.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tm_p.h"
+#include "rtl.h"
+#include "tree.h"
+#include "c-family/c-common.h"
+#include "rtl.h"
+
+int
+arm_early_load_addr_dep (rtx producer, rtx consumer)
+{
+  rtx value = PATTERN (producer);
+  rtx addr = PATTERN (consumer);
+
+  if (GET_CODE (value) == COND_EXEC)
+    value = COND_EXEC_CODE (value);
+  if (GET_CODE (value) == PARALLEL)
+    value = XVECEXP (value, 0, 0);
+  value = XEXP (value, 0);
+  if (GET_CODE (addr) == COND_EXEC)
+    addr = COND_EXEC_CODE (addr);
+  if (GET_CODE (addr) == PARALLEL)
+    {
+      if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
+        addr = XVECEXP (addr, 0, 1);
+      else
+        addr = XVECEXP (addr, 0, 0);
+    }
+  addr = XEXP (addr, 1);
+
+  return reg_overlap_mentioned_p (value, addr);
+}
+
+/* Return nonzero if the CONSUMER instruction (an ALU op) does not
+   have an early register shift value or amount dependency on the
+   result of PRODUCER.  */
+
+int
+arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
+{
+  rtx value = PATTERN (producer);
+  rtx op = PATTERN (consumer);
+  rtx early_op;
+
+  if (GET_CODE (value) == COND_EXEC)
+    value = COND_EXEC_CODE (value);
+  if (GET_CODE (value) == PARALLEL)
+    value = XVECEXP (value, 0, 0);
+  value = XEXP (value, 0);
+  if (GET_CODE (op) == COND_EXEC)
+    op = COND_EXEC_CODE (op);
+  if (GET_CODE (op) == PARALLEL)
+    op = XVECEXP (op, 0, 0);
+  op = XEXP (op, 1);
+
+  early_op = XEXP (op, 0);
+  /* This is either an actual independent shift, or a shift applied to
+     the first operand of another operation.  We want the whole shift
+     operation.  */
+  if (REG_P (early_op))
+    early_op = op;
+
+  return !reg_overlap_mentioned_p (value, early_op);
+}
+
+/* Return nonzero if the CONSUMER instruction (an ALU op) does not
+   have an early register shift value dependency on the result of
+   PRODUCER.  */
+
+int
+arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
+{
+  rtx value = PATTERN (producer);
+  rtx op = PATTERN (consumer);
+  rtx early_op;
+
+  if (GET_CODE (value) == COND_EXEC)
+    value = COND_EXEC_CODE (value);
+  if (GET_CODE (value) == PARALLEL)
+    value = XVECEXP (value, 0, 0);
+  value = XEXP (value, 0);
+  if (GET_CODE (op) == COND_EXEC)
+    op = COND_EXEC_CODE (op);
+  if (GET_CODE (op) == PARALLEL)
+    op = XVECEXP (op, 0, 0);
+  op = XEXP (op, 1);
+
+  early_op = XEXP (op, 0);
+
+  /* This is either an actual independent shift, or a shift applied to
+     the first operand of another operation.  We want the value being
+     shifted, in either case.  */
+  if (!REG_P (early_op))
+    early_op = XEXP (early_op, 0);
+
+  return !reg_overlap_mentioned_p (value, early_op);
+}
+
+/* Return nonzero if the CONSUMER (a mul or mac op) does not
+   have an early register mult dependency on the result of
+   PRODUCER.  */
+
+int
+arm_no_early_mul_dep (rtx producer, rtx consumer)
+{
+  rtx value = PATTERN (producer);
+  rtx op = PATTERN (consumer);
+
+  if (GET_CODE (value) == COND_EXEC)
+    value = COND_EXEC_CODE (value);
+  if (GET_CODE (value) == PARALLEL)
+    value = XVECEXP (value, 0, 0);
+  value = XEXP (value, 0);
+  if (GET_CODE (op) == COND_EXEC)
+    op = COND_EXEC_CODE (op);
+  if (GET_CODE (op) == PARALLEL)
+    op = XVECEXP (op, 0, 0);
+  op = XEXP (op, 1);
+
+  if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
+    {
+      if (GET_CODE (XEXP (op, 0)) == MULT)
+	return !reg_overlap_mentioned_p (value, XEXP (op, 0));
+      else
+	return !reg_overlap_mentioned_p (value, XEXP (op, 1));
+    }
+
+  return 0;
+}
+
+/* Return nonzero if the CONSUMER instruction (a store) does not need
+   PRODUCER's value to calculate the address.  */
+
+int
+arm_no_early_store_addr_dep (rtx producer, rtx consumer)
+{
+  rtx value = PATTERN (producer);
+  rtx addr = PATTERN (consumer);
+
+  if (GET_CODE (value) == COND_EXEC)
+    value = COND_EXEC_CODE (value);
+  if (GET_CODE (value) == PARALLEL)
+    value = XVECEXP (value, 0, 0);
+  value = XEXP (value, 0);
+  if (GET_CODE (addr) == COND_EXEC)
+    addr = COND_EXEC_CODE (addr);
+  if (GET_CODE (addr) == PARALLEL)
+    addr = XVECEXP (addr, 0, 0);
+  addr = XEXP (addr, 0);
+
+  return !reg_overlap_mentioned_p (value, addr);
+}
+
+/* Return nonzero if the CONSUMER instruction (a store) does need
+   PRODUCER's value to calculate the address.  */
+
+int
+arm_early_store_addr_dep (rtx producer, rtx consumer)
+{
+  return !arm_no_early_store_addr_dep (producer, consumer);
+}
+
+/* Return non-zero iff the consumer (a multiply-accumulate or a
+   multiple-subtract instruction) has an accumulator dependency on the
+   result of the producer and no other dependency on that result.  It
+   does not check if the producer is multiply-accumulate instruction.  */
+int
+arm_mac_accumulator_is_result (rtx producer, rtx consumer)
+{
+  rtx result;
+  rtx op0, op1, acc;
+
+  producer = PATTERN (producer);
+  consumer = PATTERN (consumer);
+
+  if (GET_CODE (producer) == COND_EXEC)
+    producer = COND_EXEC_CODE (producer);
+  if (GET_CODE (consumer) == COND_EXEC)
+    consumer = COND_EXEC_CODE (consumer);
+
+  if (GET_CODE (producer) != SET)
+    return 0;
+
+  result = XEXP (producer, 0);
+
+  if (GET_CODE (consumer) != SET)
+    return 0;
+
+  /* Check that the consumer is of the form
+     (set (...) (plus (mult ...) (...)))
+     or
+     (set (...) (minus (...) (mult ...))).  */
+  if (GET_CODE (XEXP (consumer, 1)) == PLUS)
+    {
+      if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
+        return 0;
+
+      op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
+      op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
+      acc = XEXP (XEXP (consumer, 1), 1);
+    }
+  else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
+    {
+      if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
+        return 0;
+
+      op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
+      op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
+      acc = XEXP (XEXP (consumer, 1), 0);
+    }
+  else
+    return 0;
+
+  return (reg_overlap_mentioned_p (result, acc)
+          && !reg_overlap_mentioned_p (result, op0)
+          && !reg_overlap_mentioned_p (result, op1));
+}
+
+/* Return non-zero if the consumer (a multiply-accumulate instruction)
+   has an accumulator dependency on the result of the producer (a
+   multiplication instruction) and no other dependency on that result.  */
+int
+arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
+{
+  rtx mul = PATTERN (producer);
+  rtx mac = PATTERN (consumer);
+  rtx mul_result;
+  rtx mac_op0, mac_op1, mac_acc;
+
+  if (GET_CODE (mul) == COND_EXEC)
+    mul = COND_EXEC_CODE (mul);
+  if (GET_CODE (mac) == COND_EXEC)
+    mac = COND_EXEC_CODE (mac);
+
+  /* Check that mul is of the form (set (...) (mult ...))
+     and mla is of the form (set (...) (plus (mult ...) (...))).  */
+  if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
+      || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
+          || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
+    return 0;
+
+  mul_result = XEXP (mul, 0);
+  mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
+  mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
+  mac_acc = XEXP (XEXP (mac, 1), 1);
+
+  return (reg_overlap_mentioned_p (mul_result, mac_acc)
+          && !reg_overlap_mentioned_p (mul_result, mac_op0)
+          && !reg_overlap_mentioned_p (mul_result, mac_op1));
+}
diff --git a/gcc/config/arm/arm-fixed.md b/gcc/config/arm/arm-fixed.md
index dc8e7ac8c14..3972a850990 100644
--- a/gcc/config/arm/arm-fixed.md
+++ b/gcc/config/arm/arm-fixed.md
@@ -25,7 +25,8 @@
   "TARGET_32BIT"
   "add%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "yes,no")])
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "type" "alu_reg")])
 
 (define_insn "add<mode>3"
   [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
@@ -34,7 +35,8 @@
   "TARGET_INT_SIMD"
   "sadd<qaddsub_suf>%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")])
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_reg")])
 
 (define_insn "usadd<mode>3"
   [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
@@ -43,7 +45,8 @@
   "TARGET_INT_SIMD"
   "uqadd<qaddsub_suf>%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")])
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_reg")])
 
 (define_insn "ssadd<mode>3"
   [(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
@@ -52,7 +55,8 @@
   "TARGET_INT_SIMD"
   "qadd<qaddsub_suf>%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")])
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_reg")])
 
 (define_insn "sub<mode>3"
   [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
@@ -61,7 +65,8 @@
   "TARGET_32BIT"
   "sub%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "yes,no")])
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "type" "alu_reg")])
 
 (define_insn "sub<mode>3"
   [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
@@ -70,7 +75,8 @@
   "TARGET_INT_SIMD"
   "ssub<qaddsub_suf>%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")])
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_reg")])
 
 (define_insn "ussub<mode>3"
   [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
@@ -80,7 +86,8 @@
   "TARGET_INT_SIMD"
   "uqsub<qaddsub_suf>%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")])
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_reg")])
 
 (define_insn "sssub<mode>3"
   [(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
@@ -89,7 +96,8 @@
   "TARGET_INT_SIMD"
   "qsub<qaddsub_suf>%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")])
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_reg")])
 
 ;; Fractional multiplies.
 
@@ -246,6 +254,7 @@
   return "";
 }
   [(set_attr "conds" "clob")
+   (set_attr "type" "multiple")
    (set (attr "length")
 	(if_then_else (eq_attr "is_thumb" "yes")
 		      (if_then_else (match_test "arm_restrict_it")
@@ -305,6 +314,7 @@
   return "";
 }
   [(set_attr "conds" "clob")
+   (set_attr "type" "multiple")
    (set (attr "length")
 	(if_then_else (eq_attr "is_thumb" "yes")
 		      (if_then_else (match_test "arm_restrict_it")
@@ -406,7 +416,7 @@
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "shift" "1")
-   (set_attr "type" "arlo_shift")])
+   (set_attr "type" "alu_shift_imm")])
 
 (define_insn "arm_usatsihi"
   [(set (match_operand:HI 0 "s_register_operand" "=r")
@@ -414,5 +424,6 @@
   "TARGET_INT_SIMD"
   "usat%?\\t%0, #16, %1"
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alu_imm")]
 )
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index ef94bbcea25..f694dfdaae2 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -97,14 +97,6 @@ extern bool arm_tls_referenced_p (rtx);
 extern int arm_coproc_mem_operand (rtx, bool);
 extern int neon_vector_mem_operand (rtx, int, bool);
 extern int neon_struct_mem_operand (rtx);
-extern int arm_no_early_store_addr_dep (rtx, rtx);
-extern int arm_early_store_addr_dep (rtx, rtx);
-extern int arm_early_load_addr_dep (rtx, rtx);
-extern int arm_no_early_alu_shift_dep (rtx, rtx);
-extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
-extern int arm_no_early_mul_dep (rtx, rtx);
-extern int arm_mac_accumulator_is_result (rtx, rtx);
-extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
 
 extern int tls_mentioned_p (rtx);
 extern int symbol_mentioned_p (rtx);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 294de80a73b..f9027ddd2e7 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -4544,7 +4544,9 @@ aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
       {
 	pcum->aapcs_vfp_reg_alloc = mask << regno;
-	if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
+	if (mode == BLKmode
+	    || (mode == TImode && ! TARGET_NEON)
+	    || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
 	  {
 	    int i;
 	    int rcount = pcum->aapcs_vfp_rcount;
@@ -8662,8 +8664,14 @@ xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
 	 instruction we depend on is another ALU instruction, then we may
 	 have to account for an additional stall.  */
       if (shift_opnum != 0
-	  && (attr_type == TYPE_ARLO_SHIFT
-	      || attr_type == TYPE_ARLO_SHIFT_REG
+	  && (attr_type == TYPE_ALU_SHIFT_IMM
+	      || attr_type == TYPE_ALUS_SHIFT_IMM
+	      || attr_type == TYPE_LOGIC_SHIFT_IMM
+	      || attr_type == TYPE_LOGICS_SHIFT_IMM
+	      || attr_type == TYPE_ALU_SHIFT_REG
+	      || attr_type == TYPE_ALUS_SHIFT_REG
+	      || attr_type == TYPE_LOGIC_SHIFT_REG
+	      || attr_type == TYPE_LOGICS_SHIFT_REG
 	      || attr_type == TYPE_MOV_SHIFT
 	      || attr_type == TYPE_MVN_SHIFT
 	      || attr_type == TYPE_MOV_SHIFT_REG
@@ -8950,9 +8958,17 @@ cortexa7_older_only (rtx insn)
 
   switch (get_attr_type (insn))
     {
-    case TYPE_ARLO_REG:
+    case TYPE_ALU_REG:
+    case TYPE_ALUS_REG:
+    case TYPE_LOGIC_REG:
+    case TYPE_LOGICS_REG:
+    case TYPE_ADC_REG:
+    case TYPE_ADCS_REG:
+    case TYPE_ADR:
+    case TYPE_BFM:
+    case TYPE_REV:
     case TYPE_MVN_REG:
-    case TYPE_SHIFT:
+    case TYPE_SHIFT_IMM:
     case TYPE_SHIFT_REG:
     case TYPE_LOAD_BYTE:
     case TYPE_LOAD1:
@@ -8961,7 +8977,7 @@ cortexa7_older_only (rtx insn)
     case TYPE_FADDS:
     case TYPE_FFARITHD:
     case TYPE_FADDD:
-    case TYPE_FCPYS:
+    case TYPE_FMOV:
     case TYPE_F_CVT:
     case TYPE_FCMPS:
     case TYPE_FCMPD:
@@ -8973,7 +8989,8 @@ cortexa7_older_only (rtx insn)
     case TYPE_FMACD:
     case TYPE_FDIVS:
     case TYPE_FDIVD:
-    case TYPE_F_2_R:
+    case TYPE_F_MRC:
+    case TYPE_F_MRRC:
     case TYPE_F_FLAG:
     case TYPE_F_LOADS:
     case TYPE_F_STORES:
@@ -8996,7 +9013,10 @@ cortexa7_younger (FILE *file, int verbose, rtx insn)
 
   switch (get_attr_type (insn))
     {
-    case TYPE_ARLO_IMM:
+    case TYPE_ALU_IMM:
+    case TYPE_ALUS_IMM:
+    case TYPE_LOGIC_IMM:
+    case TYPE_LOGICS_IMM:
     case TYPE_EXTEND:
     case TYPE_MVN_IMM:
     case TYPE_MOV_IMM:
@@ -14360,6 +14380,16 @@ thumb2_reorg (void)
 				   && IN_RANGE (INTVAL (op1), -7, 7))
 			    action = CONV;
 			}
+		      /* ADCS <Rd>, <Rn>  */
+		      else if (GET_CODE (XEXP (src, 0)) == PLUS
+			      && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
+			      && low_register_operand (XEXP (XEXP (src, 0), 1),
+						       SImode)
+			      && COMPARISON_P (op1)
+			      && cc_register (XEXP (op1, 0), VOIDmode)
+			      && maybe_get_arm_condition_code (op1) == ARM_CS
+			      && XEXP (op1, 1) == const0_rtx)
+		        action = CONV;
 		      break;
 
 		    case MINUS:
@@ -16801,123 +16831,165 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
     }
 }
 
-/* Generate and emit a pattern that will be recognized as STRD pattern.  If even
-   number of registers are being pushed, multiple STRD patterns are created for
-   all register pairs.  If odd number of registers are pushed, emit a
-   combination of STRDs and STR for the prologue saves.  */
+/* Generate and emit a sequence of insns equivalent to PUSH, but using
+   STR and STRD.  If an even number of registers are being pushed, one
+   or more STRD patterns are created for each register pair.  If an
+   odd number of registers are pushed, emit an initial STR followed by
+   as many STRD instructions as are needed.  This works best when the
+   stack is initially 64-bit aligned (the normal case), since it
+   ensures that each STRD is also 64-bit aligned.  */
 static void
 thumb2_emit_strd_push (unsigned long saved_regs_mask)
 {
   int num_regs = 0;
-  int i, j;
+  int i;
+  int regno;
   rtx par = NULL_RTX;
-  rtx insn = NULL_RTX;
   rtx dwarf = NULL_RTX;
-  rtx tmp, reg, tmp1;
-
-  for (i = 0; i <= LAST_ARM_REGNUM; i++)
-    if (saved_regs_mask & (1 << i))
-      num_regs++;
+  rtx tmp;
+  bool first = true;
 
-  gcc_assert (num_regs && num_regs <= 16);
+  num_regs = bit_count (saved_regs_mask);
 
-  /* Pre-decrement the stack pointer, based on there being num_regs 4-byte
-     registers to push.  */
-  tmp = gen_rtx_SET (VOIDmode,
-                     stack_pointer_rtx,
-                     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
-  RTX_FRAME_RELATED_P (tmp) = 1;
-  insn = emit_insn (tmp);
+  /* Must be at least one register to save, and can't save SP or PC.  */
+  gcc_assert (num_regs > 0 && num_regs <= 14);
+  gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
+  gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
 
-  /* Create sequence for DWARF info.  */
+  /* Create sequence for DWARF info.  All the frame-related data for
+     debugging is held in this wrapper.  */
   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
 
-  /* RTLs cannot be shared, hence create new copy for dwarf.  */
-  tmp1 = gen_rtx_SET (VOIDmode,
-                     stack_pointer_rtx,
-                     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
-  RTX_FRAME_RELATED_P (tmp1) = 1;
-  XVECEXP (dwarf, 0, 0) = tmp1;
+  /* Describe the stack adjustment.  */
+  tmp = gen_rtx_SET (VOIDmode,
+		      stack_pointer_rtx,
+		      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, 0) = tmp;
 
-  gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
-  gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
+  /* Find the first register.  */
+  for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
+    ;
 
-  /* Var j iterates over all the registers to gather all the registers in
-     saved_regs_mask.  Var i gives index of register R_j in stack frame.
-     A PARALLEL RTX of register-pair is created here, so that pattern for
-     STRD can be matched.  If num_regs is odd, 1st register will be pushed
-     using STR and remaining registers will be pushed with STRD in pairs.
-     If num_regs is even, all registers are pushed with STRD in pairs.
-     Hence, skip first element for odd num_regs.  */
-  for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--)
-    if (saved_regs_mask & (1 << j))
-      {
-        /* Create RTX for store.  New RTX is created for dwarf as
-           they are not sharable.  */
-        reg = gen_rtx_REG (SImode, j);
-        tmp = gen_rtx_SET (SImode,
-                           gen_frame_mem
-                           (SImode,
-                            plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
-                           reg);
+  i = 0;
 
-        tmp1 = gen_rtx_SET (SImode,
-                           gen_frame_mem
-                           (SImode,
-                            plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
-                           reg);
-        RTX_FRAME_RELATED_P (tmp) = 1;
-        RTX_FRAME_RELATED_P (tmp1) = 1;
-
-        if (((i - (num_regs % 2)) % 2) == 1)
-          /* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to
-             be created.  Hence create it first.  The STRD pattern we are
-             generating is :
-             [ (SET (MEM (PLUS (SP) (NUM))) (reg_t1))
-               (SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ]
-             where the target registers need not be consecutive.  */
-          par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+  /* If there's an odd number of registers to push.  Start off by
+     pushing a single register.  This ensures that subsequent strd
+     operations are dword aligned (assuming that SP was originally
+     64-bit aligned).  */
+  if ((num_regs & 1) != 0)
+    {
+      rtx reg, mem, insn;
 
-        /* Register R_j is added in PARALLEL RTX.  If (i - (num_regs % 2)) is
-           even, the reg_j is added as 0th element and if it is odd, reg_i is
-           added as 1st element of STRD pattern shown above.  */
-        XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp;
-        XVECEXP (dwarf, 0, (i + 1)) = tmp1;
+      reg = gen_rtx_REG (SImode, regno);
+      if (num_regs == 1)
+	mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
+						     stack_pointer_rtx));
+      else
+	mem = gen_frame_mem (Pmode,
+			     gen_rtx_PRE_MODIFY
+			     (Pmode, stack_pointer_rtx,
+			      plus_constant (Pmode, stack_pointer_rtx,
+					     -4 * num_regs)));
 
-        if (((i - (num_regs % 2)) % 2) == 0)
-          /* When (i - (num_regs % 2)) is even, RTXs for both the registers
-             to be loaded are generated in above given STRD pattern, and the
-             pattern can be emitted now.  */
-          emit_insn (par);
+      tmp = gen_rtx_SET (VOIDmode, mem, reg);
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      insn = emit_insn (tmp);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+      tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
+			 reg);
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      i++;
+      regno++;
+      XVECEXP (dwarf, 0, i) = tmp;
+      first = false;
+    }
 
-        i--;
-      }
+  while (i < num_regs)
+    if (saved_regs_mask & (1 << regno))
+      {
+	rtx reg1, reg2, mem1, mem2;
+	rtx tmp0, tmp1, tmp2;
+	int regno2;
 
-  if ((num_regs % 2) == 1)
-    {
-      /* If odd number of registers are pushed, generate STR pattern to store
-         lone register.  */
-      for (; (saved_regs_mask & (1 << j)) == 0; j--);
+	/* Find the register to pair with this one.  */
+	for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
+	     regno2++)
+	  ;
 
-      tmp1 = gen_frame_mem (SImode, plus_constant (Pmode,
-                                                   stack_pointer_rtx, 4 * i));
-      reg = gen_rtx_REG (SImode, j);
-      tmp = gen_rtx_SET (SImode, tmp1, reg);
-      RTX_FRAME_RELATED_P (tmp) = 1;
+	reg1 = gen_rtx_REG (SImode, regno);
+	reg2 = gen_rtx_REG (SImode, regno2);
 
-      emit_insn (tmp);
+	if (first)
+	  {
+	    rtx insn;
+
+	    first = false;
+	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
+							stack_pointer_rtx,
+							-4 * num_regs));
+	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
+							stack_pointer_rtx,
+							-4 * (num_regs - 1)));
+	    tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				plus_constant (Pmode, stack_pointer_rtx,
+					       -4 * (num_regs)));
+	    tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
+	    tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
+	    RTX_FRAME_RELATED_P (tmp0) = 1;
+	    RTX_FRAME_RELATED_P (tmp1) = 1;
+	    RTX_FRAME_RELATED_P (tmp2) = 1;
+	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
+	    XVECEXP (par, 0, 0) = tmp0;
+	    XVECEXP (par, 0, 1) = tmp1;
+	    XVECEXP (par, 0, 2) = tmp2;
+	    insn = emit_insn (par);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+	  }
+	else
+	  {
+	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
+							stack_pointer_rtx,
+							4 * i));
+	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
+							stack_pointer_rtx,
+							4 * (i + 1)));
+	    tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
+	    tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
+	    RTX_FRAME_RELATED_P (tmp1) = 1;
+	    RTX_FRAME_RELATED_P (tmp2) = 1;
+	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+	    XVECEXP (par, 0, 0) = tmp1;
+	    XVECEXP (par, 0, 1) = tmp2;
+	    emit_insn (par);
+	  }
 
-      tmp1 = gen_rtx_SET (SImode,
-                         gen_frame_mem
-                         (SImode,
-                          plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
-                          reg);
-      RTX_FRAME_RELATED_P (tmp1) = 1;
-      XVECEXP (dwarf, 0, (i + 1)) = tmp1;
-    }
+	/* Create unwind information.  This is an approximation.  */
+	tmp1 = gen_rtx_SET (VOIDmode,
+			    gen_frame_mem (Pmode,
+					   plus_constant (Pmode,
+							  stack_pointer_rtx,
+							  4 * i)),
+			    reg1);
+	tmp2 = gen_rtx_SET (VOIDmode,
+			    gen_frame_mem (Pmode,
+					   plus_constant (Pmode,
+							  stack_pointer_rtx,
+							  4 * (i + 1))),
+			    reg2);
+
+	RTX_FRAME_RELATED_P (tmp1) = 1;
+	RTX_FRAME_RELATED_P (tmp2) = 1;
+	XVECEXP (dwarf, 0, i + 1) = tmp1;
+	XVECEXP (dwarf, 0, i + 2) = tmp2;
+	i += 2;
+	regno = regno2 + 1;
+      }
+    else
+      regno++;
 
-  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
-  RTX_FRAME_RELATED_P (insn) = 1;
   return;
 }
 
@@ -25394,163 +25466,6 @@ arm_setup_incoming_varargs (cumulative_args_t pcum_v,
     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
 }
 
-/* Return nonzero if the CONSUMER instruction (a store) does not need
-   PRODUCER's value to calculate the address.  */
-
-int
-arm_no_early_store_addr_dep (rtx producer, rtx consumer)
-{
-  rtx value = PATTERN (producer);
-  rtx addr = PATTERN (consumer);
-
-  if (GET_CODE (value) == COND_EXEC)
-    value = COND_EXEC_CODE (value);
-  if (GET_CODE (value) == PARALLEL)
-    value = XVECEXP (value, 0, 0);
-  value = XEXP (value, 0);
-  if (GET_CODE (addr) == COND_EXEC)
-    addr = COND_EXEC_CODE (addr);
-  if (GET_CODE (addr) == PARALLEL)
-    addr = XVECEXP (addr, 0, 0);
-  addr = XEXP (addr, 0);
-
-  return !reg_overlap_mentioned_p (value, addr);
-}
-
-/* Return nonzero if the CONSUMER instruction (a store) does need
-   PRODUCER's value to calculate the address.  */
-
-int
-arm_early_store_addr_dep (rtx producer, rtx consumer)
-{
-  return !arm_no_early_store_addr_dep (producer, consumer);
-}
-
-/* Return nonzero if the CONSUMER instruction (a load) does need
-   PRODUCER's value to calculate the address.  */
-
-int
-arm_early_load_addr_dep (rtx producer, rtx consumer)
-{
-  rtx value = PATTERN (producer);
-  rtx addr = PATTERN (consumer);
-
-  if (GET_CODE (value) == COND_EXEC)
-    value = COND_EXEC_CODE (value);
-  if (GET_CODE (value) == PARALLEL)
-    value = XVECEXP (value, 0, 0);
-  value = XEXP (value, 0);
-  if (GET_CODE (addr) == COND_EXEC)
-    addr = COND_EXEC_CODE (addr);
-  if (GET_CODE (addr) == PARALLEL)
-    {
-      if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
-        addr = XVECEXP (addr, 0, 1);
-      else
-        addr = XVECEXP (addr, 0, 0);
-    }
-  addr = XEXP (addr, 1);
-
-  return reg_overlap_mentioned_p (value, addr);
-}
-
-/* Return nonzero if the CONSUMER instruction (an ALU op) does not
-   have an early register shift value or amount dependency on the
-   result of PRODUCER.  */
-
-int
-arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
-{
-  rtx value = PATTERN (producer);
-  rtx op = PATTERN (consumer);
-  rtx early_op;
-
-  if (GET_CODE (value) == COND_EXEC)
-    value = COND_EXEC_CODE (value);
-  if (GET_CODE (value) == PARALLEL)
-    value = XVECEXP (value, 0, 0);
-  value = XEXP (value, 0);
-  if (GET_CODE (op) == COND_EXEC)
-    op = COND_EXEC_CODE (op);
-  if (GET_CODE (op) == PARALLEL)
-    op = XVECEXP (op, 0, 0);
-  op = XEXP (op, 1);
-
-  early_op = XEXP (op, 0);
-  /* This is either an actual independent shift, or a shift applied to
-     the first operand of another operation.  We want the whole shift
-     operation.  */
-  if (REG_P (early_op))
-    early_op = op;
-
-  return !reg_overlap_mentioned_p (value, early_op);
-}
-
-/* Return nonzero if the CONSUMER instruction (an ALU op) does not
-   have an early register shift value dependency on the result of
-   PRODUCER.  */
-
-int
-arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
-{
-  rtx value = PATTERN (producer);
-  rtx op = PATTERN (consumer);
-  rtx early_op;
-
-  if (GET_CODE (value) == COND_EXEC)
-    value = COND_EXEC_CODE (value);
-  if (GET_CODE (value) == PARALLEL)
-    value = XVECEXP (value, 0, 0);
-  value = XEXP (value, 0);
-  if (GET_CODE (op) == COND_EXEC)
-    op = COND_EXEC_CODE (op);
-  if (GET_CODE (op) == PARALLEL)
-    op = XVECEXP (op, 0, 0);
-  op = XEXP (op, 1);
-
-  early_op = XEXP (op, 0);
-
-  /* This is either an actual independent shift, or a shift applied to
-     the first operand of another operation.  We want the value being
-     shifted, in either case.  */
-  if (!REG_P (early_op))
-    early_op = XEXP (early_op, 0);
-
-  return !reg_overlap_mentioned_p (value, early_op);
-}
-
-/* Return nonzero if the CONSUMER (a mul or mac op) does not
-   have an early register mult dependency on the result of
-   PRODUCER.  */
-
-int
-arm_no_early_mul_dep (rtx producer, rtx consumer)
-{
-  rtx value = PATTERN (producer);
-  rtx op = PATTERN (consumer);
-
-  if (GET_CODE (value) == COND_EXEC)
-    value = COND_EXEC_CODE (value);
-  if (GET_CODE (value) == PARALLEL)
-    value = XVECEXP (value, 0, 0);
-  value = XEXP (value, 0);
-  if (GET_CODE (op) == COND_EXEC)
-    op = COND_EXEC_CODE (op);
-  if (GET_CODE (op) == PARALLEL)
-    op = XVECEXP (op, 0, 0);
-  op = XEXP (op, 1);
-
-  if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
-    {
-      if (GET_CODE (XEXP (op, 0)) == MULT)
-	return !reg_overlap_mentioned_p (value, XEXP (op, 0));
-      else
-	return !reg_overlap_mentioned_p (value, XEXP (op, 1));
-    }
-
-  return 0;
-}
-
 /* We can't rely on the caller doing the proper promotion when
    using APCS or ATPCS.  */
 
@@ -25600,95 +25515,6 @@ arm_cxx_guard_type (void)
   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
 }
 
-/* Return non-zero iff the consumer (a multiply-accumulate or a
-   multiple-subtract instruction) has an accumulator dependency on the
-   result of the producer and no other dependency on that result.  It
-   does not check if the producer is multiply-accumulate instruction.  */
-int
-arm_mac_accumulator_is_result (rtx producer, rtx consumer)
-{
-  rtx result;
-  rtx op0, op1, acc;
-
-  producer = PATTERN (producer);
-  consumer = PATTERN (consumer);
-
-  if (GET_CODE (producer) == COND_EXEC)
-    producer = COND_EXEC_CODE (producer);
-  if (GET_CODE (consumer) == COND_EXEC)
-    consumer = COND_EXEC_CODE (consumer);
-
-  if (GET_CODE (producer) != SET)
-    return 0;
-
-  result = XEXP (producer, 0);
-
-  if (GET_CODE (consumer) != SET)
-    return 0;
-
-  /* Check that the consumer is of the form
-     (set (...) (plus (mult ...) (...)))
-     or
-     (set (...) (minus (...) (mult ...))).  */
-  if (GET_CODE (XEXP (consumer, 1)) == PLUS)
-    {
-      if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
-        return 0;
-
-      op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
-      op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
-      acc = XEXP (XEXP (consumer, 1), 1);
-    }
-  else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
-    {
-      if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
-        return 0;
-
-      op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
-      op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
-      acc = XEXP (XEXP (consumer, 1), 0);
-    }
-  else
-    return 0;
-
-  return (reg_overlap_mentioned_p (result, acc)
-          && !reg_overlap_mentioned_p (result, op0)
-          && !reg_overlap_mentioned_p (result, op1));
-}
-
-/* Return non-zero if the consumer (a multiply-accumulate instruction)
-   has an accumulator dependency on the result of the producer (a
-   multiplication instruction) and no other dependency on that result.  */
-int
-arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
-{
-  rtx mul = PATTERN (producer);
-  rtx mac = PATTERN (consumer);
-  rtx mul_result;
-  rtx mac_op0, mac_op1, mac_acc;
-
-  if (GET_CODE (mul) == COND_EXEC)
-    mul = COND_EXEC_CODE (mul);
-  if (GET_CODE (mac) == COND_EXEC)
-    mac = COND_EXEC_CODE (mac);
-
-  /* Check that mul is of the form (set (...) (mult ...))
-     and mla is of the form (set (...) (plus (mult ...) (...))).  */
-  if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
-      || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
-          || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
-    return 0;
-
-  mul_result = XEXP (mul, 0);
-  mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
-  mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
-  mac_acc = XEXP (XEXP (mac, 1), 1);
-
-  return (reg_overlap_mentioned_p (mul_result, mac_acc)
-          && !reg_overlap_mentioned_p (mul_result, mac_op0)
-          && !reg_overlap_mentioned_p (mul_result, mac_op1));
-}
-
 
 /* The EABI says test the least significant bit of a guard variable.  */
 
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 387d2717431..1781b75b34b 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -645,6 +645,8 @@ extern int prefer_neon_for_64bits;
 
 #define BIGGEST_ALIGNMENT (ARM_DOUBLEWORD_ALIGN ? DOUBLEWORD_ALIGNMENT : 32)
 
+#define MALLOC_ABI_ALIGNMENT  BIGGEST_ALIGNMENT
+
 /* XXX Blah -- this macro is used directly by libobjc.  Since it
    supports no vector modes, cut out the complexity and fall back
    on BIGGEST_FIELD_ALIGNMENT.  */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index fceb04c1272..8a482b570ec 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -245,411 +245,13 @@
   (set_attr "length" "4")
   (set_attr "pool_range" "250")])
 
-; TYPE attribute is used to classify instructions for use in scheduling.
-;
-; Instruction classification:
-;
-; arlo_imm           any arithmetic or logical instruction that doesn't have
-;                    a shifted operand and has an immediate operand.  This
-;                    excludes MOV, MVN and RSB(S) immediate.
-; arlo_reg           any arithmetic or logical instruction that doesn't have
-;                    a shifted or an immediate operand.  This excludes
-;                    MOV and MVN but includes MOVT.  This is also the default.
-; arlo_shift         any arithmetic or logical instruction that has a source
-;                    operand shifted by a constant.  This excludes
-;                    simple shifts.
-; arlo_shift_reg     as arlo_shift, with the shift amount specified in a
-;                    register.
-; block              blockage insn, this blocks all functional units.
-; branch             branch.
-; call               subroutine call.
-; clz                count leading zeros (CLZ).
-; extend             extend instruction (SXTB, SXTH, UXTB, UXTH).
-; f_2_r              transfer from float to core (no memory needed).
-; f_cvt              conversion between float and integral.
-; f_flag             transfer of co-processor flags to the CPSR.
-; f_load[d,s]        double/single load from memory.  Used for VFP unit.
-; f_minmax[d,s]      double/single floating point minimum/maximum.
-; f_rint[d,s]        double/single floating point rount to integral.
-; f_sel[d,s]         double/single floating byte select.
-; f_store[d,s]       double/single store to memory.  Used for VFP unit.
-; fadd[d,s]          double/single floating-point scalar addition.
-; fcmp[d,s]          double/single floating-point compare.
-; fconst[d,s]        double/single load immediate.
-; fcpys              single precision floating point cpy.
-; fdiv[d,s]          double/single precision floating point division.
-; ffarith[d,s]       double/single floating point abs/neg/cpy.
-; ffma[d,s]          double/single floating point fused multiply-accumulate.
-; float              floating point arithmetic operation.
-; fmac[d,s]          double/single floating point multiply-accumulate.
-; fmul[d,s]          double/single floating point multiply.
-; load_byte          load byte(s) from memory to arm registers.
-; load1              load 1 word from memory to arm registers.
-; load2              load 2 words from memory to arm registers.
-; load3              load 3 words from memory to arm registers.
-; load4              load 4 words from memory to arm registers.
-; mla                integer multiply accumulate.
-; mlas               integer multiply accumulate, flag setting.
-; mov_imm            simple MOV instruction that moves an immediate to
-;                    register.  This includes MOVW, but not MOVT.
-; mov_reg            simple MOV instruction that moves a register to another
-;                    register.  This includes MOVW, but not MOVT.
-; mov_shift          simple MOV instruction, shifted operand by a constant.
-; mov_shift_reg      simple MOV instruction, shifted operand by a register.
-; mul                integer multiply.
-; muls               integer multiply, flag setting.
-; mvn_imm            inverting move instruction, immediate.
-; mvn_reg            inverting move instruction, register.
-; mvn_shift          inverting move instruction, shifted operand by a constant.
-; mvn_shift_reg      inverting move instruction, shifted operand by a register.
-; r_2_f              transfer from core to float.
-; sdiv               signed division.
-; shift              simple shift operation (LSL, LSR, ASR, ROR) with an
-;                    immediate.
-; shift_reg          simple shift by a register.
-; smlad              signed multiply accumulate dual.
-; smladx             signed multiply accumulate dual reverse.
-; smlal              signed multiply accumulate long.
-; smlald             signed multiply accumulate long dual.
-; smlals             signed multiply accumulate long, flag setting.
-; smlalxy            signed multiply accumulate, 16x16-bit, 64-bit accumulate.
-; smlawx             signed multiply accumulate, 32x16-bit, 32-bit accumulate.
-; smlawy             signed multiply accumulate wide, 32x16-bit,
-;                    32-bit accumulate.
-; smlaxy             signed multiply accumulate, 16x16-bit, 32-bit accumulate.
-; smlsd              signed multiply subtract dual.
-; smlsdx             signed multiply subtract dual reverse.
-; smlsld             signed multiply subtract long dual.
-; smmla              signed most significant word multiply accumulate.
-; smmul              signed most significant word multiply.
-; smmulr             signed most significant word multiply, rounded.
-; smuad              signed dual multiply add.
-; smuadx             signed dual multiply add reverse.
-; smull              signed multiply long.
-; smulls             signed multiply long, flag setting.
-; smulwy             signed multiply wide, 32x16-bit, 32-bit accumulate.
-; smulxy             signed multiply, 16x16-bit, 32-bit accumulate.
-; smusd              signed dual multiply subtract.
-; smusdx             signed dual multiply subtract reverse.
-; store1             store 1 word to memory from arm registers.
-; store2             store 2 words to memory from arm registers.
-; store3             store 3 words to memory from arm registers.
-; store4             store 4 (or more) words to memory from arm registers.
-; udiv               unsigned division.
-; umaal              unsigned multiply accumulate accumulate long.
-; umlal              unsigned multiply accumulate long.
-; umlals             unsigned multiply accumulate long, flag setting.
-; umull              unsigned multiply long.
-; umulls             unsigned multiply long, flag setting.
-;
-; The classification below is for instructions used by the Wireless MMX
-; Technology. Each attribute value is used to classify an instruction of the
-; same name or family.
-;
-; wmmx_tandc
-; wmmx_tbcst
-; wmmx_textrc
-; wmmx_textrm
-; wmmx_tinsr
-; wmmx_tmcr
-; wmmx_tmcrr
-; wmmx_tmia
-; wmmx_tmiaph
-; wmmx_tmiaxy
-; wmmx_tmrc
-; wmmx_tmrrc
-; wmmx_tmovmsk
-; wmmx_torc
-; wmmx_torvsc
-; wmmx_wabs
-; wmmx_wdiff
-; wmmx_wacc
-; wmmx_wadd
-; wmmx_waddbhus
-; wmmx_waddsubhx
-; wmmx_waligni
-; wmmx_walignr
-; wmmx_wand
-; wmmx_wandn
-; wmmx_wavg2
-; wmmx_wavg4
-; wmmx_wcmpeq
-; wmmx_wcmpgt
-; wmmx_wmac
-; wmmx_wmadd
-; wmmx_wmax
-; wmmx_wmerge
-; wmmx_wmiawxy
-; wmmx_wmiaxy
-; wmmx_wmin
-; wmmx_wmov
-; wmmx_wmul
-; wmmx_wmulw
-; wmmx_wldr
-; wmmx_wor
-; wmmx_wpack
-; wmmx_wqmiaxy
-; wmmx_wqmulm
-; wmmx_wqmulwm
-; wmmx_wror
-; wmmx_wsad
-; wmmx_wshufh
-; wmmx_wsll
-; wmmx_wsra
-; wmmx_wsrl
-; wmmx_wstr
-; wmmx_wsub
-; wmmx_wsubaddhx
-; wmmx_wunpckeh
-; wmmx_wunpckel
-; wmmx_wunpckih
-; wmmx_wunpckil
-; wmmx_wxor
-
-(define_attr "type"
- "arlo_imm,\
-  arlo_reg,\
-  arlo_shift,\
-  arlo_shift_reg,\
-  block,\
-  branch,\
-  call,\
-  clz,\
-  extend,\
-  f_2_r,\
-  f_cvt,\
-  f_flag,\
-  f_loadd,\
-  f_loads,\
-  f_minmaxd,\
-  f_minmaxs,\
-  f_rintd,\
-  f_rints,\
-  f_seld,\
-  f_sels,\
-  f_stored,\
-  f_stores,\
-  faddd,\
-  fadds,\
-  fcmpd,\
-  fcmps,\
-  fconstd,\
-  fconsts,\
-  fcpys,\
-  fdivd,\
-  fdivs,\
-  ffarithd,\
-  ffariths,\
-  ffmad,\
-  ffmas,\
-  float,\
-  fmacd,\
-  fmacs,\
-  fmuld,\
-  fmuls,\
-  load_byte,\
-  load1,\
-  load2,\
-  load3,\
-  load4,\
-  mla,\
-  mlas,\
-  mov_imm,\
-  mov_reg,\
-  mov_shift,\
-  mov_shift_reg,\
-  mul,\
-  muls,\
-  mvn_imm,\
-  mvn_reg,\
-  mvn_shift,\
-  mvn_shift_reg,\
-  r_2_f,\
-  sdiv,\
-  shift,\
-  shift_reg,\
-  smlad,\
-  smladx,\
-  smlal,\
-  smlald,\
-  smlals,\
-  smlalxy,\
-  smlawx,\
-  smlawy,\
-  smlaxy,\
-  smlsd,\
-  smlsdx,\
-  smlsld,\
-  smmla,\
-  smmul,\
-  smmulr,\
-  smuad,\
-  smuadx,\
-  smull,\
-  smulls,\
-  smulwy,\
-  smulxy,\
-  smusd,\
-  smusdx,\
-  store1,\
-  store2,\
-  store3,\
-  store4,\
-  udiv,\
-  umaal,\
-  umlal,\
-  umlals,\
-  umull,\
-  umulls,\
-  wmmx_tandc,\
-  wmmx_tbcst,\
-  wmmx_textrc,\
-  wmmx_textrm,\
-  wmmx_tinsr,\
-  wmmx_tmcr,\
-  wmmx_tmcrr,\
-  wmmx_tmia,\
-  wmmx_tmiaph,\
-  wmmx_tmiaxy,\
-  wmmx_tmrc,\
-  wmmx_tmrrc,\
-  wmmx_tmovmsk,\
-  wmmx_torc,\
-  wmmx_torvsc,\
-  wmmx_wabs,\
-  wmmx_wabsdiff,\
-  wmmx_wacc,\
-  wmmx_wadd,\
-  wmmx_waddbhus,\
-  wmmx_waddsubhx,\
-  wmmx_waligni,\
-  wmmx_walignr,\
-  wmmx_wand,\
-  wmmx_wandn,\
-  wmmx_wavg2,\
-  wmmx_wavg4,\
-  wmmx_wcmpeq,\
-  wmmx_wcmpgt,\
-  wmmx_wmac,\
-  wmmx_wmadd,\
-  wmmx_wmax,\
-  wmmx_wmerge,\
-  wmmx_wmiawxy,\
-  wmmx_wmiaxy,\
-  wmmx_wmin,\
-  wmmx_wmov,\
-  wmmx_wmul,\
-  wmmx_wmulw,\
-  wmmx_wldr,\
-  wmmx_wor,\
-  wmmx_wpack,\
-  wmmx_wqmiaxy,\
-  wmmx_wqmulm,\
-  wmmx_wqmulwm,\
-  wmmx_wror,\
-  wmmx_wsad,\
-  wmmx_wshufh,\
-  wmmx_wsll,\
-  wmmx_wsra,\
-  wmmx_wsrl,\
-  wmmx_wstr,\
-  wmmx_wsub,\
-  wmmx_wsubaddhx,\
-  wmmx_wunpckeh,\
-  wmmx_wunpckel,\
-  wmmx_wunpckih,\
-  wmmx_wunpckil,\
-  wmmx_wxor"
-  (const_string "arlo_reg"))
-
-; Is this an (integer side) multiply with a 32-bit (or smaller) result?
-(define_attr "mul32" "no,yes"
-  (if_then_else
-    (eq_attr "type"
-     "smulxy,smlaxy,smulwy,smlawx,mul,muls,mla,mlas,smlawy,smuad,smuadx,\
-      smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,smlald,smlsld")
-    (const_string "yes")
-    (const_string "no")))
-
-; Is this an (integer side) multiply with a 64-bit result?
-(define_attr "mul64" "no,yes"
-  (if_then_else
-    (eq_attr "type"
-     "smlalxy,umull,umulls,umaal,umlal,umlals,smull,smulls,smlal,smlals")
-    (const_string "yes")
-    (const_string "no")))
+;; Instruction classification types
+(include "types.md")
 
 ; Load scheduling, set from the arm_ld_sched variable
 ; initialized by arm_option_override()
 (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
 
-;; Classification of NEON instructions for scheduling purposes.
-(define_attr "neon_type"
-   "neon_int_1,\
-   neon_int_2,\
-   neon_int_3,\
-   neon_int_4,\
-   neon_int_5,\
-   neon_vqneg_vqabs,\
-   neon_vmov,\
-   neon_vaba,\
-   neon_vsma,\
-   neon_vaba_qqq,\
-   neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
-   neon_mul_qqq_8_16_32_ddd_32,\
-   neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\
-   neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
-   neon_mla_qqq_8_16,\
-   neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\
-   neon_mla_qqq_32_qqd_32_scalar,\
-   neon_mul_ddd_16_scalar_32_16_long_scalar,\
-   neon_mul_qqd_32_scalar,\
-   neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\
-   neon_shift_1,\
-   neon_shift_2,\
-   neon_shift_3,\
-   neon_vshl_ddd,\
-   neon_vqshl_vrshl_vqrshl_qqq,\
-   neon_vsra_vrsra,\
-   neon_fp_vadd_ddd_vabs_dd,\
-   neon_fp_vadd_qqq_vabs_qq,\
-   neon_fp_vsum,\
-   neon_fp_vmul_ddd,\
-   neon_fp_vmul_qqd,\
-   neon_fp_vmla_ddd,\
-   neon_fp_vmla_qqq,\
-   neon_fp_vmla_ddd_scalar,\
-   neon_fp_vmla_qqq_scalar,\
-   neon_fp_vrecps_vrsqrts_ddd,\
-   neon_fp_vrecps_vrsqrts_qqq,\
-   neon_bp_simple,\
-   neon_bp_2cycle,\
-   neon_bp_3cycle,\
-   neon_ldr,\
-   neon_str,\
-   neon_vld1_1_2_regs,\
-   neon_vld1_3_4_regs,\
-   neon_vld2_2_regs_vld1_vld2_all_lanes,\
-   neon_vld2_4_regs,\
-   neon_vld3_vld4,\
-   neon_vst1_1_2_regs_vst2_2_regs,\
-   neon_vst1_3_4_regs,\
-   neon_vst2_4_regs_vst3_vst4,\
-   neon_vst3_vst4,\
-   neon_vld1_vld2_lane,\
-   neon_vld3_vld4_lane,\
-   neon_vst1_vst2_lane,\
-   neon_vst3_vst4_lane,\
-   neon_vld3_vld4_all_lanes,\
-   neon_mcr,\
-   neon_mcr_2_mcrr,\
-   neon_mrc,\
-   neon_mrrc,\
-   neon_ldm_2,\
-   neon_stm_2,\
-   none"
- (const_string "none"))
-
 ; condition codes: this one is used by final_prescan_insn to speed up
 ; conditionalizing instructions.  It saves having to scan the rtl to see if
 ; it uses or alters the condition codes.
@@ -675,9 +277,34 @@
 	 (ior (eq_attr "is_thumb1" "yes")
 	      (eq_attr "type" "call"))
 	 (const_string "clob")
-	 (if_then_else (eq_attr "neon_type" "none")
-	  (const_string "nocond")
-	  (const_string "unconditional"))))
+	 (if_then_else (eq_attr "type" 
+	 "!neon_int_1, neon_int_2, neon_int_3, neon_int_4, neon_int_5,\
+	  neon_vqneg_vqabs, neon_vmov, neon_vaba, neon_vsma, neon_vaba_qqq,\
+	  neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+	  neon_mul_qqq_8_16_32_ddd_32,\
+	  neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\
+	  neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+	  neon_mla_qqq_8_16,\
+	  neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\
+	  neon_mla_qqq_32_qqd_32_scalar,\
+	  neon_mul_ddd_16_scalar_32_16_long_scalar, neon_mul_qqd_32_scalar,\
+	  neon_mla_ddd_16_scalar_qdd_32_16_long_scalar, neon_shift_1,\
+	  neon_shift_2, neon_shift_3, neon_vshl_ddd,\
+	  neon_vqshl_vrshl_vqrshl_qqq, neon_vsra_vrsra,\
+	  neon_fp_vadd_ddd_vabs_dd, neon_fp_vadd_qqq_vabs_qq, neon_fp_vsum,\
+	  neon_fp_vmul_ddd, neon_fp_vmul_qqd, neon_fp_vmla_ddd,\
+	  neon_fp_vmla_qqq, neon_fp_vmla_ddd_scalar, neon_fp_vmla_qqq_scalar,\
+	  neon_fp_vrecps_vrsqrts_ddd, neon_fp_vrecps_vrsqrts_qqq,\
+	  neon_bp_simple, neon_bp_2cycle, neon_bp_3cycle, neon_ldr, neon_str,\
+	  neon_vld1_1_2_regs, neon_vld1_3_4_regs,\
+	  neon_vld2_2_regs_vld1_vld2_all_lanes, neon_vld2_4_regs,\
+	  neon_vld3_vld4, neon_vst1_1_2_regs_vst2_2_regs, neon_vst1_3_4_regs,\
+	  neon_vst2_4_regs_vst3_vst4, neon_vst3_vst4, neon_vld1_vld2_lane,\
+	  neon_vld3_vld4_lane, neon_vst1_vst2_lane, neon_vst3_vst4_lane,\
+	  neon_vld3_vld4_all_lanes, neon_mcr, neon_mcr_2_mcrr, neon_mrc,\
+	  neon_mrrc, neon_ldm_2, neon_stm_2")
+	 (const_string "nocond")
+	 (const_string "unconditional"))))
 
 ; Predicable means that the insn can be conditionally executed based on
 ; an automatically added predicate (additional patterns are generated by 
@@ -703,8 +330,11 @@
 ; than one on the main cpu execution unit.
 (define_attr "core_cycles" "single,multi"
   (if_then_else (eq_attr "type"
-    "arlo_imm, arlo_reg,\
-    extend, shift, arlo_shift, float, fdivd, fdivs,\
+    "adc_imm, adc_reg, adcs_imm, adcs_reg, adr, alu_ext, alu_imm, alu_reg,\
+    alu_shift_imm, alu_shift_reg, alus_ext, alus_imm, alus_reg,\
+    alus_shift_imm, alus_shift_reg, bfm, csel, rev, logic_imm, logic_reg,\
+    logic_shift_imm, logic_shift_reg, logics_imm, logics_reg,\
+    logics_shift_imm, logics_shift_reg, extend, shift_imm, float, fcsel,\
     wmmx_wor, wmmx_wxor, wmmx_wand, wmmx_wandn, wmmx_wmov, wmmx_tmcrr,\
     wmmx_tmrrc, wmmx_wldr, wmmx_wstr, wmmx_tmcr, wmmx_tmrc, wmmx_wadd,\
     wmmx_wsub, wmmx_wmul, wmmx_wmac, wmmx_wavg2, wmmx_tinsr, wmmx_textrm,\
@@ -830,7 +460,8 @@
   ]
   "TARGET_THUMB1"
   "add\\t%Q0, %Q0, %Q2\;adc\\t%R0, %R0, %R2"
-  [(set_attr "length" "4")]
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*arm_adddi3"
@@ -858,7 +489,8 @@
     operands[2] = gen_lowpart (SImode, operands[2]);
   }"
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*adddi_sesidi_di"
@@ -887,7 +519,8 @@
     operands[2] = gen_lowpart (SImode, operands[2]);
   }"
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*adddi_zesidi_di"
@@ -914,7 +547,8 @@
     operands[2] = gen_lowpart (SImode, operands[2]);
   }"
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_expand "addsi3"
@@ -989,8 +623,8 @@
    (set_attr "predicable_short_it" "yes,yes,yes,yes,no,no,no,no,no,no,no,no,no,no,no")
    (set_attr "arch" "t2,t2,t2,t2,*,*,*,t2,t2,*,*,a,t2,t2,*")
    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
-		      (const_string "arlo_imm")
-		      (const_string "arlo_reg")))
+		      (const_string "alu_imm")
+		      (const_string "alu_reg")))
  ]
 )
 
@@ -1040,7 +674,9 @@
     operands[3] = GEN_INT (offset);
     operands[2] = GEN_INT (INTVAL (operands[2]) - offset);
   }
-  [(set_attr "length" "2,2,2,2,2,2,2,4,4,4")]
+  [(set_attr "length" "2,2,2,2,2,2,2,4,4,4")
+   (set_attr "type" "alus_imm,alus_imm,alus_reg,alus_reg,alus_reg,
+		     alus_reg,alus_reg,multiple,multiple,multiple")]
 )
 
 ;; Reloading and elimination of the frame pointer can
@@ -1071,7 +707,7 @@
    sub%.\\t%0, %1, #%n2
    add%.\\t%0, %1, %2"
   [(set_attr "conds" "set")
-   (set_attr "type" "arlo_imm,arlo_imm,*")]
+   (set_attr "type" "alus_imm,alus_imm,alus_reg")]
 )
 
 (define_insn "*addsi3_compare0_scratch"
@@ -1087,8 +723,7 @@
    cmn%?\\t%0, %1"
   [(set_attr "conds" "set")
    (set_attr "predicable" "yes")
-   (set_attr "type" "arlo_imm,arlo_imm,*")
-   ]
+   (set_attr "type" "alus_imm,alus_imm,alus_reg")]
 )
 
 (define_insn "*compare_negsi_si"
@@ -1102,7 +737,8 @@
    (set_attr "predicable" "yes")
    (set_attr "arch" "t2,*")
    (set_attr "length" "2,4")
-   (set_attr "predicable_short_it" "yes,no")]
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "type" "alus_reg")]
 )
 
 ;; This is the canonicalization of addsi3_compare0_for_combiner when the
@@ -1119,7 +755,8 @@
   "@
    add%.\\t%0, %1, %3
    sub%.\\t%0, %1, #%n3"
-  [(set_attr "conds" "set")]
+  [(set_attr "conds" "set")
+   (set_attr "type" "alus_reg")]
 )
 
 ;; Convert the sequence
@@ -1177,7 +814,7 @@
    sub%.\\t%0, %1, #%n2
    add%.\\t%0, %1, %2"
   [(set_attr "conds" "set")
-   (set_attr "type"  "arlo_imm,arlo_imm,*")]
+   (set_attr "type"  "alus_imm,alus_imm,alus_reg")]
 )
 
 (define_insn "*addsi3_compare_op2"
@@ -1194,7 +831,7 @@
    add%.\\t%0, %1, %2
    sub%.\\t%0, %1, #%n2"
   [(set_attr "conds" "set")
-   (set_attr "type" "arlo_imm,arlo_imm,*")]
+   (set_attr "type" "alus_imm,alus_imm,alus_reg")]
 )
 
 (define_insn "*compare_addsi2_op0"
@@ -1215,7 +852,7 @@
    (set_attr "arch" "t2,t2,*,*,*")
    (set_attr "predicable_short_it" "yes,yes,no,no,no")
    (set_attr "length" "2,2,4,4,4")
-   (set_attr "type" "arlo_imm,*,arlo_imm,arlo_imm,*")]
+   (set_attr "type" "alus_imm,alus_reg,alus_imm,alus_imm,alus_reg")]
 )
 
 (define_insn "*compare_addsi2_op1"
@@ -1236,8 +873,7 @@
    (set_attr "arch" "t2,t2,*,*,*")
    (set_attr "predicable_short_it" "yes,yes,no,no,no")
    (set_attr "length" "2,2,4,4,4")
-   (set_attr "type"
-      "arlo_imm,*,arlo_imm,arlo_imm,*")]
+   (set_attr "type" "alus_imm,alus_reg,alus_imm,alus_imm,alus_reg")]
  )
 
 (define_insn "*addsi3_carryin_<optab>"
@@ -1254,7 +890,8 @@
    (set_attr "predicable" "yes")
    (set_attr "arch" "t2,*,*")
    (set_attr "length" "4")
-   (set_attr "predicable_short_it" "yes,no,no")]
+   (set_attr "predicable_short_it" "yes,no,no")
+   (set_attr "type" "adc_reg,adc_reg,adc_imm")]
 )
 
 (define_insn "*addsi3_carryin_alt2_<optab>"
@@ -1271,7 +908,8 @@
    (set_attr "predicable" "yes")
    (set_attr "arch" "t2,*,*")
    (set_attr "length" "4")
-   (set_attr "predicable_short_it" "yes,no,no")]
+   (set_attr "predicable_short_it" "yes,no,no")
+   (set_attr "type" "adc_reg,adc_reg,adc_imm")]
 )
 
 (define_insn "*addsi3_carryin_shift_<optab>"
@@ -1288,8 +926,8 @@
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
-		      (const_string "arlo_shift")
-		      (const_string "arlo_shift_reg")))]
+		      (const_string "alu_shift_imm")
+		      (const_string "alu_shift_reg")))]
 )
 
 (define_insn "*addsi3_carryin_clobercc_<optab>"
@@ -1300,7 +938,8 @@
    (clobber (reg:CC CC_REGNUM))]
    "TARGET_32BIT"
    "adc%.\\t%0, %1, %2"
-   [(set_attr "conds" "set")]
+   [(set_attr "conds" "set")
+    (set_attr "type" "adcs_reg")]
 )
 
 (define_insn "*subsi3_carryin"
@@ -1315,7 +954,8 @@
   [(set_attr "conds" "use")
    (set_attr "arch" "*,a")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "adc_reg,adc_imm")]
 )
 
 (define_insn "*subsi3_carryin_const"
@@ -1325,7 +965,8 @@
                   (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
   "TARGET_32BIT"
   "sbc\\t%0, %1, #%B2"
-  [(set_attr "conds" "use")]
+  [(set_attr "conds" "use")
+   (set_attr "type" "adc_imm")]
 )
 
 (define_insn "*subsi3_carryin_compare"
@@ -1338,7 +979,8 @@
                   (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
   "TARGET_32BIT"
   "sbcs\\t%0, %1, %2"
-  [(set_attr "conds" "set")]
+  [(set_attr "conds" "set")
+   (set_attr "type" "adcs_reg")]
 )
 
 (define_insn "*subsi3_carryin_compare_const"
@@ -1351,7 +993,8 @@
                   (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
   "TARGET_32BIT"
   "sbcs\\t%0, %1, #%B2"
-  [(set_attr "conds" "set")]
+  [(set_attr "conds" "set")
+   (set_attr "type" "adcs_imm")]
 )
 
 (define_insn "*subsi3_carryin_shift"
@@ -1367,8 +1010,8 @@
   [(set_attr "conds" "use")
    (set_attr "predicable" "yes")
    (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
-		      (const_string "arlo_shift")
-                     (const_string "arlo_shift_reg")))]
+		      (const_string "alu_shift_imm")
+                     (const_string "alu_shift_reg")))]
 )
 
 (define_insn "*rsbsi3_carryin_shift"
@@ -1384,8 +1027,8 @@
   [(set_attr "conds" "use")
    (set_attr "predicable" "yes")
    (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
-		      (const_string "arlo_shift")
-		      (const_string "arlo_shift_reg")))]
+		      (const_string "alu_shift_imm")
+		      (const_string "alu_shift_reg")))]
 )
 
 ; transform ((x << y) - 1) to ~(~(x-1) << y)  Where X is a constant.
@@ -1458,7 +1101,8 @@
     operands[2] = gen_lowpart (SImode, operands[2]);
    }
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*thumb_subdi3"
@@ -1468,7 +1112,8 @@
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_THUMB1"
   "sub\\t%Q0, %Q0, %Q2\;sbc\\t%R0, %R0, %R2"
-  [(set_attr "length" "4")]
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*subdi_di_zesidi"
@@ -1493,7 +1138,8 @@
     operands[5] = GEN_INT (~0);
    }
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*subdi_di_sesidi"
@@ -1519,7 +1165,8 @@
     operands[1] = gen_lowpart (SImode, operands[1]);
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*subdi_zesidi_di"
@@ -1545,7 +1192,8 @@
     operands[1] = gen_lowpart (SImode, operands[1]);
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*subdi_sesidi_di"
@@ -1574,7 +1222,8 @@
     operands[1] = gen_lowpart (SImode, operands[1]);
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*subdi_zesidi_zesidi"
@@ -1597,7 +1246,8 @@
        operands[0] = gen_lowpart (SImode, operands[0]);
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_expand "subsi3"
@@ -1628,7 +1278,9 @@
   "TARGET_THUMB1"
   "sub\\t%0, %1, %2"
   [(set_attr "length" "2")
-   (set_attr "conds" "set")])
+   (set_attr "conds" "set")
+   (set_attr "type" "alus_reg")]
+)
 
 ; ??? Check Thumb-2 split length
 (define_insn_and_split "*arm_subsi3_insn"
@@ -1658,7 +1310,7 @@
    (set_attr "arch" "t2,t2,t2,t2,*,*,*,*,*")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "yes,yes,yes,yes,no,no,no,no,no")
-   (set_attr "type"  "*,*,*,*,arlo_imm,arlo_imm,*,*,arlo_imm")]
+   (set_attr "type" "alu_reg,alu_reg,alu_reg,alu_reg,alu_imm,alu_imm,alu_reg,alu_reg,multiple")]
 )
 
 (define_peephole2
@@ -1688,7 +1340,7 @@
    sub%.\\t%0, %1, %2
    rsb%.\\t%0, %2, %1"
   [(set_attr "conds" "set")
-   (set_attr "type"  "arlo_imm,*,*")]
+   (set_attr "type"  "alus_imm,alus_reg,alus_reg")]
 )
 
 (define_insn "subsi3_compare"
@@ -1703,7 +1355,7 @@
    sub%.\\t%0, %1, %2
    rsb%.\\t%0, %2, %1"
   [(set_attr "conds" "set")
-   (set_attr "type" "arlo_imm,*,*")]
+   (set_attr "type" "alus_imm,alus_reg,alus_reg")]
 )
 
 (define_expand "subsf3"
@@ -1725,6 +1377,20 @@
 
 ;; Multiplication insns
 
+(define_expand "mulhi3"
+  [(set (match_operand:HI 0 "s_register_operand" "")
+	(mult:HI (match_operand:HI 1 "s_register_operand" "")
+		 (match_operand:HI 2 "s_register_operand" "")))]
+  "TARGET_DSP_MULTIPLY"
+  "
+  {
+    rtx result = gen_reg_rtx (SImode);
+    emit_insn (gen_mulhisi3 (result, operands[1], operands[2]));
+    emit_move_insn (operands[0], gen_lowpart (HImode, result));
+    DONE;
+  }"
+)
+
 (define_expand "mulsi3"
   [(set (match_operand:SI          0 "s_register_operand" "")
 	(mult:SI (match_operand:SI 2 "s_register_operand" "")
@@ -2496,7 +2162,7 @@
                                            gen_highpart_mode (SImode, DImode, operands[2]));
 
   }"
-  [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1")
+  [(set_attr "type" "neon_int_1,neon_int_1,multiple,multiple,multiple,multiple,neon_int_1,neon_int_1")
    (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,
                      avoid_neon_for_64bits,avoid_neon_for_64bits")
    (set_attr "length" "*,*,8,8,8,8,*,*")
@@ -2521,7 +2187,8 @@
     operands[0] = gen_lowpart (SImode, operands[0]);
     operands[1] = gen_lowpart (SImode, operands[1]);
   }"
-  [(set_attr "length" "8")]
+  [(set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*anddi_sesdi_di"
@@ -2531,7 +2198,8 @@
 		(match_operand:DI  1 "s_register_operand" "0,r")))]
   "TARGET_32BIT"
   "#"
-  [(set_attr "length" "8")]
+  [(set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_expand "andsi3"
@@ -2638,8 +2306,7 @@
   [(set_attr "length" "4,4,4,4,16")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no,yes,no,no,no")
-   (set_attr "type"
-     "arlo_imm,arlo_imm,*,*,arlo_imm")]
+   (set_attr "type" "logic_imm,logic_imm,logic_reg,logic_reg,logic_imm")]
 )
 
 (define_insn "*thumb1_andsi3_insn"
@@ -2649,7 +2316,7 @@
   "TARGET_THUMB1"
   "and\\t%0, %2"
   [(set_attr "length" "2")
-   (set_attr "type"  "arlo_imm")
+   (set_attr "type"  "logic_imm")
    (set_attr "conds" "set")])
 
 (define_insn "*andsi3_compare0"
@@ -2666,7 +2333,7 @@
    bic%.\\t%0, %1, #%B2
    and%.\\t%0, %1, %2"
   [(set_attr "conds" "set")
-   (set_attr "type" "arlo_imm,arlo_imm,*")]
+   (set_attr "type" "logics_imm,logics_imm,logics_reg")]
 )
 
 (define_insn "*andsi3_compare0_scratch"
@@ -2682,7 +2349,7 @@
    bic%.\\t%2, %0, #%B1
    tst%?\\t%0, %1"
   [(set_attr "conds" "set")
-   (set_attr "type"  "arlo_imm,arlo_imm,*")]
+   (set_attr "type"  "logics_imm,logics_imm,logics_reg")]
 )
 
 (define_insn "*zeroextractsi_compare0_scratch"
@@ -2706,7 +2373,7 @@
   [(set_attr "conds" "set")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "type" "arlo_imm")]
+   (set_attr "type" "logics_imm")]
 )
 
 (define_insn_and_split "*ne_zeroextractsi"
@@ -2743,7 +2410,8 @@
    (set (attr "length")
 	(if_then_else (eq_attr "is_thumb" "yes")
 		      (const_int 12)
-		      (const_int 8)))]
+		      (const_int 8)))
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*ne_zeroextractsi_shifted"
@@ -2768,7 +2436,8 @@
   operands[2] = GEN_INT (32 - INTVAL (operands[2]));
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*ite_ne_zeroextractsi"
@@ -2806,7 +2475,8 @@
 			 << INTVAL (operands[3])); 
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*ite_ne_zeroextractsi_shifted"
@@ -2833,7 +2503,8 @@
   operands[2] = GEN_INT (32 - INTVAL (operands[2]));
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_split
@@ -3134,7 +2805,8 @@
   "bfc%?\t%0, %2, %1"
   [(set_attr "length" "4")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "bfm")]
 )
 
 (define_insn "insv_t2"
@@ -3146,7 +2818,8 @@
   "bfi%?\t%0, %3, %2, %1"
   [(set_attr "length" "4")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "bfm")]
 )
 
 ; constants for op 2 will never be given to these patterns.
@@ -3171,7 +2844,8 @@
     operands[2] = gen_lowpart (SImode, operands[2]);
   }"
   [(set_attr "length" "8")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*anddi_notzesidi_di"
@@ -3199,7 +2873,8 @@
   }"
   [(set_attr "length" "4,8")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*anddi_notsesidi_di"
@@ -3223,7 +2898,8 @@
   }"
   [(set_attr "length" "8")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "andsi_notsi_si"
@@ -3233,7 +2909,8 @@
   "TARGET_32BIT"
   "bic%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_reg")]
 )
 
 (define_insn "thumb1_bicsi3"
@@ -3243,7 +2920,9 @@
   "TARGET_THUMB1"
   "bic\\t%0, %1"
   [(set_attr "length" "2")
-   (set_attr "conds" "set")])
+   (set_attr "conds" "set")
+   (set_attr "type" "logics_reg")]
+)
 
 (define_insn "andsi_not_shiftsi_si"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -3256,8 +2935,8 @@
   [(set_attr "predicable" "yes")
    (set_attr "shift" "2")
    (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
-		      (const_string "arlo_shift")
-		      (const_string "arlo_shift_reg")))]
+		      (const_string "logic_shift_imm")
+		      (const_string "logic_shift_reg")))]
 )
 
 (define_insn "*andsi_notsi_si_compare0"
@@ -3270,7 +2949,8 @@
 	(and:SI (not:SI (match_dup 2)) (match_dup 1)))]
   "TARGET_32BIT"
   "bic%.\\t%0, %1, %2"
-  [(set_attr "conds" "set")]
+  [(set_attr "conds" "set")
+   (set_attr "type" "logics_shift_reg")]
 )
 
 (define_insn "*andsi_notsi_si_compare0_scratch"
@@ -3282,7 +2962,8 @@
    (clobber (match_scratch:SI 0 "=r"))]
   "TARGET_32BIT"
   "bic%.\\t%0, %1, %2"
-  [(set_attr "conds" "set")]
+  [(set_attr "conds" "set")
+   (set_attr "type" "logics_shift_reg")]
 )
 
 (define_expand "iordi3"
@@ -3331,7 +3012,7 @@
                                            gen_highpart_mode (SImode, DImode, operands[2]));
 
   }"
-  [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1")
+  [(set_attr "type" "neon_int_1,neon_int_1,multiple,multiple,multiple,multiple,neon_int_1,neon_int_1")
    (set_attr "length" "*,*,8,8,8,8,*,*")
    (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
 )
@@ -3347,7 +3028,8 @@
    #"
   [(set_attr "length" "4,8")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_reg,multiple")]
 )
 
 (define_insn "*iordi_sesidi_di"
@@ -3358,7 +3040,8 @@
   "TARGET_32BIT"
   "#"
   [(set_attr "length" "8")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "type" "multiple")]
 )
 
 (define_expand "iorsi3"
@@ -3416,7 +3099,7 @@
    (set_attr "arch" "32,t2,t2,32,32")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no,yes,no,no,no")
-   (set_attr "type" "arlo_imm,*,arlo_imm,*,*")]
+   (set_attr "type" "logic_imm,logic_reg,logic_imm,logic_reg,logic_reg")]
 )
 
 (define_insn "*thumb1_iorsi3_insn"
@@ -3426,7 +3109,8 @@
   "TARGET_THUMB1"
   "orr\\t%0, %2"
   [(set_attr "length" "2")
-   (set_attr "conds" "set")])
+   (set_attr "conds" "set")
+   (set_attr "type" "logics_reg")])
 
 (define_peephole2
   [(match_scratch:SI 3 "r")
@@ -3451,7 +3135,7 @@
   "TARGET_32BIT"
   "orr%.\\t%0, %1, %2"
   [(set_attr "conds" "set")
-   (set_attr "type" "arlo_imm,*")]
+   (set_attr "type" "logics_imm,logics_reg")]
 )
 
 (define_insn "*iorsi3_compare0_scratch"
@@ -3463,7 +3147,7 @@
   "TARGET_32BIT"
   "orr%.\\t%0, %1, %2"
   [(set_attr "conds" "set")
-   (set_attr "type" "arlo_imm,*")]
+   (set_attr "type" "logics_imm,logics_reg")]
 )
 
 (define_expand "xordi3"
@@ -3510,7 +3194,7 @@
 
   }"
   [(set_attr "length" "*,8,8,8,8,*")
-   (set_attr "neon_type" "neon_int_1,*,*,*,*,neon_int_1")
+   (set_attr "type" "neon_int_1,multiple,multiple,multiple,multiple,neon_int_1")
    (set_attr "arch" "neon_for_64bits,*,*,*,*,avoid_neon_for_64bits")]
 )
 
@@ -3525,7 +3209,8 @@
    #"
   [(set_attr "length" "4,8")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_reg")]
 )
 
 (define_insn "*xordi_sesidi_di"
@@ -3536,7 +3221,8 @@
   "TARGET_32BIT"
   "#"
   [(set_attr "length" "8")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "type" "multiple")]
 )
 
 (define_expand "xorsi3"
@@ -3589,7 +3275,7 @@
   [(set_attr "length" "4,4,4,16")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no,yes,no,no")
-   (set_attr "type"  "arlo_imm,*,*,*")]
+   (set_attr "type"  "logic_imm,logic_reg,logic_reg,multiple")]
 )
 
 (define_insn "*thumb1_xorsi3_insn"
@@ -3600,7 +3286,7 @@
   "eor\\t%0, %2"
   [(set_attr "length" "2")
    (set_attr "conds" "set")
-   (set_attr "type" "arlo_imm")]
+   (set_attr "type" "logics_reg")]
 )
 
 (define_insn "*xorsi3_compare0"
@@ -3613,7 +3299,7 @@
   "TARGET_32BIT"
   "eor%.\\t%0, %1, %2"
   [(set_attr "conds" "set")
-   (set_attr "type" "arlo_imm,*")]
+   (set_attr "type" "logics_imm,logics_reg")]
 )
 
 (define_insn "*xorsi3_compare0_scratch"
@@ -3624,7 +3310,7 @@
   "TARGET_32BIT"
   "teq%?\\t%0, %1"
   [(set_attr "conds" "set")
-   (set_attr "type" "arlo_imm,*")]
+   (set_attr "type" "logics_imm,logics_reg")]
 )
 
 ; By splitting (IOR (AND (NOT A) (NOT B)) C) as D = AND (IOR A B) (NOT C), 
@@ -3658,7 +3344,8 @@
   [(set_attr "length" "8")
    (set_attr "ce_count" "2")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
 )
 
 ; ??? Are these four splitters still beneficial when the Thumb-2 bitfield
@@ -3795,7 +3482,8 @@
   "TARGET_32BIT"
   "bic%?\\t%0, %1, %1, asr #31"
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_shift_reg")]
 )
 
 (define_insn "*smax_m1"
@@ -3805,7 +3493,8 @@
   "TARGET_32BIT"
   "orr%?\\t%0, %1, %1, asr #31"
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_shift_reg")]
 )
 
 (define_insn_and_split "*arm_smax_insn"
@@ -3826,7 +3515,8 @@
                          (match_dup 2)))]
   ""
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,12")]
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
 )
 
 (define_expand "sminsi3"
@@ -3854,7 +3544,8 @@
   "TARGET_32BIT"
   "and%?\\t%0, %1, %1, asr #31"
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_shift_reg")]
 )
 
 (define_insn_and_split "*arm_smin_insn"
@@ -3875,7 +3566,8 @@
                          (match_dup 2)))]
   ""
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,12")]
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple,multiple")]
 )
 
 (define_expand "umaxsi3"
@@ -3907,7 +3599,8 @@
                          (match_dup 2)))]
   ""
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,8,12")]
+   (set_attr "length" "8,8,12")
+   (set_attr "type" "store1")]
 )
 
 (define_expand "uminsi3"
@@ -3939,7 +3632,8 @@
                          (match_dup 2)))]
   ""
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,8,12")]
+   (set_attr "length" "8,8,12")
+   (set_attr "type" "store1")]
 )
 
 (define_insn "*store_minmaxsi"
@@ -4008,7 +3702,8 @@
    (set (attr "length")
 	(if_then_else (eq_attr "is_thumb" "yes")
 		      (const_int 14)
-		      (const_int 12)))]
+		      (const_int 12)))
+   (set_attr "type" "multiple")]
 )
 
 ; Reject the frame pointer in operand[1], since reloading this after
@@ -4034,8 +3729,7 @@
                              (match_dup 2))))
    (cond_exec (match_op_dup 5 [(reg:CC CC_REGNUM) (const_int 0)])
               (set (match_dup 0)
-                   (minus:SI (match_dup 1)
-                             (match_dup 3))))]
+                   (match_dup 6)))]
   {
   enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
                                            operands[2], operands[3]);
@@ -4048,12 +3742,17 @@
   else
     rc = reverse_condition (rc);
   operands[5] = gen_rtx_fmt_ee (rc, SImode, operands[2], operands[3]);
+  if (CONST_INT_P (operands[3]))
+    operands[6] = plus_constant (SImode, operands[1], -INTVAL (operands[3]));
+  else
+    operands[6] = gen_rtx_MINUS (SImode, operands[1], operands[3]);
   }
   [(set_attr "conds" "clob")
    (set (attr "length")
 	(if_then_else (eq_attr "is_thumb" "yes")
 		      (const_int 14)
-		      (const_int 12)))]
+		      (const_int 12)))
+   (set_attr "type" "multiple")]
 )
 
 (define_code_iterator SAT [smin smax])
@@ -4082,7 +3781,8 @@
     return "usat%?\t%0, %1, %3";
 }
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "alus_imm")]
 )
 
 (define_insn "*satsi_<SAT:code>_shift"
@@ -4110,7 +3810,7 @@
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "shift" "3")
-   (set_attr "type" "arlo_shift")])
+   (set_attr "type" "logic_shift_reg")])
 
 ;; Shift and rotation insns
 
@@ -4188,7 +3888,8 @@
     operands[1] = gen_lowpart (SImode, operands[1]);
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_expand "ashlsi3"
@@ -4213,7 +3914,7 @@
   "TARGET_THUMB1"
   "lsl\\t%0, %1, %2"
   [(set_attr "length" "2")
-   (set_attr "type" "shift,shift_reg")
+   (set_attr "type" "shift_imm,shift_reg")
    (set_attr "conds" "set")])
 
 (define_expand "ashrdi3"
@@ -4285,7 +3986,8 @@
     operands[1] = gen_lowpart (SImode, operands[1]);
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*rrx"
@@ -4318,7 +4020,7 @@
   "TARGET_THUMB1"
   "asr\\t%0, %1, %2"
   [(set_attr "length" "2")
-   (set_attr "type" "shift,shift_reg")
+   (set_attr "type" "shift_imm,shift_reg")
    (set_attr "conds" "set")])
 
 (define_expand "lshrdi3"
@@ -4390,7 +4092,8 @@
     operands[1] = gen_lowpart (SImode, operands[1]);
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_expand "lshrsi3"
@@ -4415,7 +4118,7 @@
   "TARGET_THUMB1"
   "lsr\\t%0, %1, %2"
   [(set_attr "length" "2")
-   (set_attr "type" "shift,shift_reg")
+   (set_attr "type" "shift_imm,shift_reg")
    (set_attr "conds" "set")])
 
 (define_expand "rotlsi3"
@@ -4477,7 +4180,7 @@
    (set_attr "predicable_short_it" "yes,no,no")
    (set_attr "length" "4")
    (set_attr "shift" "1")
-   (set_attr "type" "arlo_shift_reg,arlo_shift,arlo_shift_reg")]
+   (set_attr "type" "alu_shift_reg,alu_shift_imm,alu_shift_reg")]
 )
 
 (define_insn "*shiftsi3_compare"
@@ -4492,7 +4195,7 @@
   "* return arm_output_shift(operands, 1);"
   [(set_attr "conds" "set")
    (set_attr "shift" "1")
-   (set_attr "type"  "arlo_shift,arlo_shift_reg")]
+   (set_attr "type"  "alus_shift_imm,alus_shift_reg")]
 )
 
 (define_insn "*shiftsi3_compare0"
@@ -4507,7 +4210,7 @@
   "* return arm_output_shift(operands, 1);"
   [(set_attr "conds" "set")
    (set_attr "shift" "1")
-   (set_attr "type" "arlo_shift,arlo_shift_reg")]
+   (set_attr "type" "alus_shift_imm,alus_shift_reg")]
 )
 
 (define_insn "*shiftsi3_compare0_scratch"
@@ -4521,7 +4224,7 @@
   "* return arm_output_shift(operands, 1);"
   [(set_attr "conds" "set")
    (set_attr "shift" "1")
-   (set_attr "type" "shift,shift_reg")]
+   (set_attr "type" "shift_imm,shift_reg")]
 )
 
 (define_insn "*not_shiftsi"
@@ -4863,7 +4566,8 @@
   "sbfx%?\t%0, %1, %3, %2"
   [(set_attr "length" "4")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "bfm")]
 )
 
 (define_insn "extzv_t2"
@@ -4875,7 +4579,8 @@
   "ubfx%?\t%0, %1, %3, %2"
   [(set_attr "length" "4")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "bfm")]
 )
 
 
@@ -4941,7 +4646,8 @@
     operands[1] = gen_lowpart (SImode, operands[1]);
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*thumb1_negdi2"
@@ -4950,7 +4656,8 @@
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_THUMB1"
   "mov\\t%R0, #0\;neg\\t%Q0, %Q1\;sbc\\t%R0, %R1"
-  [(set_attr "length" "6")]
+  [(set_attr "length" "6")
+   (set_attr "type" "multiple")]
 )
 
 (define_expand "negsi2"
@@ -4968,7 +4675,8 @@
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "yes,no")
    (set_attr "arch" "t2,*")
-   (set_attr "length" "4")]
+   (set_attr "length" "4")
+   (set_attr "type" "alu_reg")]
 )
 
 (define_insn "*thumb1_negsi2"
@@ -4976,7 +4684,8 @@
 	(neg:SI (match_operand:SI 1 "register_operand" "l")))]
   "TARGET_THUMB1"
   "neg\\t%0, %1"
-  [(set_attr "length" "2")]
+  [(set_attr "length" "2")
+   (set_attr "type" "alu_imm")]
 )
 
 (define_expand "negsf2"
@@ -5034,7 +4743,8 @@
      DONE;
   }
   [(set_attr "length" "8,8,4,4")
-   (set_attr "arch" "a,a,t2,t2")]
+   (set_attr "arch" "a,a,t2,t2")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*negdi_zero_extendsidi"
@@ -5056,7 +4766,8 @@
     operands[0] = gen_lowpart (SImode, operands[0]);
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]   ;; length in thumb is 4
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]   ;; length in thumb is 4
 )
 
 ;; abssi2 doesn't really clobber the condition codes if a different register
@@ -5141,7 +4852,8 @@
   [(set_attr "conds" "clob,*")
    (set_attr "shift" "1")
    (set_attr "predicable" "no, yes")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*thumb1_abssi2"
@@ -5155,7 +4867,8 @@
    (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))
    (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 2)))]
   ""
-  [(set_attr "length" "6")]
+  [(set_attr "length" "6")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*arm_neg_abssi2"
@@ -5211,7 +4924,8 @@
   [(set_attr "conds" "clob,*")
    (set_attr "shift" "1")
    (set_attr "predicable" "no, yes")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*thumb1_neg_abssi2"
@@ -5225,7 +4939,8 @@
    (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))
    (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 2)))]
   ""
-  [(set_attr "length" "6")]
+  [(set_attr "length" "6")
+   (set_attr "type" "multiple")]
 )
 
 (define_expand "abssf2"
@@ -5274,7 +4989,7 @@
   }"
   [(set_attr "length" "*,8,8,*")
    (set_attr "predicable" "no,yes,yes,no")
-   (set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
+   (set_attr "type" "neon_int_1,multiple,multiple,neon_int_1")
    (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
 )
 
@@ -5461,7 +5176,8 @@
    (set_attr "ce_count" "2")
    (set_attr "shift" "1")
    (set_attr "predicable" "yes")
-   (set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits")]
+   (set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits")
+   (set_attr "type" "multiple,mov_reg,multiple,multiple,multiple")]
 )
 
 ;; Splits for all extensions to DImode
@@ -5597,7 +5313,7 @@
   "@
    #
    ldr%(h%)\\t%0, %1"
-  [(set_attr "type" "arlo_shift,load_byte")
+  [(set_attr "type" "alu_shift_reg,load_byte")
    (set_attr "predicable" "yes")]
 )
 
@@ -5618,7 +5334,7 @@
 		 (match_operand:SI 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "uxtah%?\\t%0, %2, %1"
-  [(set_attr "type" "arlo_shift")
+  [(set_attr "type" "alu_shift_reg")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")]
 )
@@ -5668,7 +5384,7 @@
    #
    ldrb\\t%0, %1"
   [(set_attr "length" "4,2")
-   (set_attr "type" "arlo_shift,load_byte")
+   (set_attr "type" "alu_shift_reg,load_byte")
    (set_attr "pool_range" "*,32")]
 )
 
@@ -5691,7 +5407,7 @@
    #
    ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2"
   [(set_attr "length" "8,4")
-   (set_attr "type" "arlo_shift,load_byte")
+   (set_attr "type" "alu_shift_reg,load_byte")
    (set_attr "predicable" "yes")]
 )
 
@@ -5714,7 +5430,7 @@
   "uxtab%?\\t%0, %2, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "type" "arlo_shift")]
+   (set_attr "type" "alu_shift_reg")]
 )
 
 (define_split
@@ -5766,7 +5482,8 @@
   "tst%?\\t%0, #255"
   [(set_attr "conds" "set")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_imm")]
 )
 
 (define_expand "extendhisi2"
@@ -5936,7 +5653,7 @@
    #
    ldr%(sh%)\\t%0, %1"
   [(set_attr "length" "8,4")
-   (set_attr "type" "arlo_shift,load_byte")
+   (set_attr "type" "alu_shift_reg,load_byte")
    (set_attr "predicable" "yes")
    (set_attr "pool_range" "*,256")
    (set_attr "neg_pool_range" "*,244")]
@@ -5963,6 +5680,7 @@
 		 (match_operand:SI 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "sxtah%?\\t%0, %2, %1"
+  [(set_attr "type" "alu_shift_reg")]
 )
 
 (define_expand "extendqihi2"
@@ -6037,7 +5755,7 @@
    #
    ldr%(sb%)\\t%0, %1"
   [(set_attr "length" "8,4")
-   (set_attr "type" "arlo_shift,load_byte")
+   (set_attr "type" "alu_shift_reg,load_byte")
    (set_attr "predicable" "yes")
    (set_attr "pool_range" "*,256")
    (set_attr "neg_pool_range" "*,244")]
@@ -6063,7 +5781,7 @@
 		 (match_operand:SI 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "sxtab%?\\t%0, %2, %1"
-  [(set_attr "type" "arlo_shift")
+  [(set_attr "type" "alu_shift_reg")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")]
 )
@@ -6283,7 +6001,7 @@
     }
   "
   [(set_attr "length" "8,12,16,8,8")
-   (set_attr "type" "*,*,*,load2,store2")
+   (set_attr "type" "multiple,multiple,multiple,load2,store2")
    (set_attr "arm_pool_range" "*,*,*,1020,*")
    (set_attr "arm_neg_pool_range" "*,*,*,1004,*")
    (set_attr "thumb2_pool_range" "*,*,*,4094,*")
@@ -6423,7 +6141,7 @@
     }
   }"
   [(set_attr "length" "4,4,6,2,2,6,4,4")
-   (set_attr "type" "*,mov_reg,*,load2,store2,load2,store2,mov_reg")
+   (set_attr "type" "multiple,multiple,multiple,load2,store2,load2,store2,multiple")
    (set_attr "pool_range" "*,*,*,*,*,1018,*,*")]
 )
 
@@ -6521,7 +6239,8 @@
   "movt%?\t%0, #:upper16:%c2"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "length" "4")]
+   (set_attr "length" "4")
+   (set_attr "type" "mov_imm")]
 )
 
 (define_insn "*arm_movsi_insn"
@@ -6593,7 +6312,7 @@
    str\\t%1, %0
    mov\\t%0, %1"
   [(set_attr "length" "2,2,4,4,2,2,2,2,2")
-   (set_attr "type" "*,*,*,*,load1,store1,load1,store1,*")
+   (set_attr "type" "mov_reg,mov_imm,multiple,multiple,load1,store1,load1,store1,mov_reg")
    (set_attr "pool_range" "*,*,*,*,*,*,1018,*,*")
    (set_attr "conds" "set,clob,*,*,nocond,nocond,nocond,nocond,nocond")])
 
@@ -6749,7 +6468,8 @@
 				     INTVAL (operands[2]));
   return \"add\\t%0, %|pc\";
   "
-  [(set_attr "length" "2")]
+  [(set_attr "length" "2")
+   (set_attr "type" "alu_reg")]
 )
 
 (define_insn "pic_add_dot_plus_eight"
@@ -6764,7 +6484,8 @@
 				       INTVAL (operands[2]));
     return \"add%?\\t%0, %|pc, %1\";
   "
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "alu_reg")]
 )
 
 (define_insn "tls_load_dot_plus_eight"
@@ -6779,7 +6500,8 @@
 				       INTVAL (operands[2]));
     return \"ldr%?\\t%0, [%|pc, %1]\t\t@ tls_load_dot_plus_eight\";
   "
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "load1")]
 )
 
 ;; PIC references to local variables can generate pic_add_dot_plus_eight
@@ -6840,7 +6562,7 @@
    cmp%?\\t%0, #0
    sub%.\\t%0, %1, #0"
   [(set_attr "conds" "set")
-   (set_attr "type" "arlo_imm,arlo_imm")]
+   (set_attr "type" "alus_imm,alus_imm")]
 )
 
 ;; Subroutine to store a half word from a register into memory.
@@ -7186,7 +6908,7 @@
       return \"ldrh	%0, %1\";
     }"
   [(set_attr "length" "2,4,2,2,2,2")
-   (set_attr "type" "*,load1,store1,*,*,*")
+   (set_attr "type" "alus_imm,load1,store1,mov_reg,mov_reg,mov_imm")
    (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")])
 
 
@@ -7434,7 +7156,7 @@
    mov\\t%0, %1
    mov\\t%0, %1"
   [(set_attr "length" "2")
-   (set_attr "type" "arlo_imm,load1,store1,mov_reg,mov_imm,mov_imm")
+   (set_attr "type" "alu_imm,load1,store1,mov_reg,mov_imm,mov_imm")
    (set_attr "pool_range" "*,32,*,*,*,*")
    (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")])
 
@@ -7499,7 +7221,7 @@
     }
   "
   [(set_attr "conds" "unconditional")
-   (set_attr "type" "load1,store1,mov_reg,mov_reg")
+   (set_attr "type" "load1,store1,mov_reg,multiple")
    (set_attr "length" "4,4,4,8")
    (set_attr "predicable" "yes")]
 )
@@ -7612,7 +7334,7 @@
    mov\\t%0, %1
    mov\\t%0, %1"
   [(set_attr "length" "2")
-   (set_attr "type" "*,load1,store1,load1,store1,mov_reg,mov_reg")
+   (set_attr "type" "alus_imm,load1,store1,load1,store1,mov_reg,mov_reg")
    (set_attr "pool_range" "*,*,*,1018,*,*,*")
    (set_attr "conds" "clob,nocond,nocond,nocond,nocond,nocond,nocond")]
 )
@@ -7700,7 +7422,7 @@
     }
   "
   [(set_attr "length" "8,12,16,8,8")
-   (set_attr "type" "*,*,*,load2,store2")
+   (set_attr "type" "multiple,multiple,multiple,load2,store2")
    (set_attr "arm_pool_range" "*,*,*,1020,*")
    (set_attr "thumb2_pool_range" "*,*,*,1018,*")
    (set_attr "arm_neg_pool_range" "*,*,*,1004,*")
@@ -7744,7 +7466,7 @@
     }
   "
   [(set_attr "length" "4,2,2,6,4,4")
-   (set_attr "type" "*,load2,store2,load2,store2,mov_reg")
+   (set_attr "type" "multiple,load2,store2,load2,store2,multiple")
    (set_attr "pool_range" "*,*,*,1018,*,*")]
 )
 
@@ -8052,7 +7774,8 @@
 	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
 		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
 		(const_int 6)
-		(const_int 8))))]
+		(const_int 8))))
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "cbranchsi4_scratch"
@@ -8088,7 +7811,8 @@
 	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
 		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
 		(const_int 6)
-		(const_int 8))))]
+		(const_int 8))))
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*negated_cbranchsi4"
@@ -8123,7 +7847,8 @@
 	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
 		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
 		(const_int 6)
-		(const_int 8))))]
+		(const_int 8))))
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*tbit_cbranch"
@@ -8167,7 +7892,8 @@
 	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
 		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
 		(const_int 6)
-		(const_int 8))))]
+		(const_int 8))))
+   (set_attr "type" "multiple")]
 )
   
 (define_insn "*tlobits_cbranch"
@@ -8211,7 +7937,8 @@
 	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
 		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
 		(const_int 6)
-		(const_int 8))))]
+		(const_int 8))))
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*tstsi3_cbranch"
@@ -8248,7 +7975,8 @@
 	        (and (ge (minus (match_dup 2) (pc)) (const_int -2040))
 		     (le (minus (match_dup 2) (pc)) (const_int 2048)))
 		(const_int 6)
-		(const_int 8))))]
+		(const_int 8))))
+   (set_attr "type" "multiple")]
 )
   
 (define_insn "*cbranchne_decr1"
@@ -8351,7 +8079,8 @@
 	   (and (ge (minus (match_dup 4) (pc)) (const_int -2038))
 		(le (minus (match_dup 4) (pc)) (const_int 2048)))
 	   (const_int 8)
-	   (const_int 10)))])]
+	   (const_int 10)))])
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*addsi3_cbranch"
@@ -8432,7 +8161,8 @@
 	   (and (ge (minus (match_dup 5) (pc)) (const_int -2038))
 		(le (minus (match_dup 5) (pc)) (const_int 2048)))
 	   (const_int 8)
-	   (const_int 10)))))]
+	   (const_int 10)))))
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*addsi3_cbranch_scratch"
@@ -8500,7 +8230,8 @@
 	   (and (ge (minus (match_dup 4) (pc)) (const_int -2040))
 		(le (minus (match_dup 4) (pc)) (const_int 2048)))
 	   (const_int 6)
-	   (const_int 8))))]
+	   (const_int 8))))
+   (set_attr "type" "multiple")]
 )
 
 
@@ -8520,34 +8251,34 @@
    (set_attr "arch" "t2,t2,any,any")
    (set_attr "length" "2,2,4,4")
    (set_attr "predicable" "yes")
-   (set_attr "type" "*,*,*,arlo_imm")]
+   (set_attr "type" "alus_reg,alus_reg,alus_reg,alus_imm")]
 )
 
 (define_insn "*cmpsi_shiftsi"
   [(set (reg:CC CC_REGNUM)
-	(compare:CC (match_operand:SI   0 "s_register_operand" "r,r")
+	(compare:CC (match_operand:SI   0 "s_register_operand" "r,r,r")
 		    (match_operator:SI  3 "shift_operator"
-		     [(match_operand:SI 1 "s_register_operand" "r,r")
-		      (match_operand:SI 2 "shift_amount_operand" "M,rM")])))]
+		     [(match_operand:SI 1 "s_register_operand" "r,r,r")
+		      (match_operand:SI 2 "shift_amount_operand" "M,r,M")])))]
   "TARGET_32BIT"
   "cmp%?\\t%0, %1%S3"
   [(set_attr "conds" "set")
    (set_attr "shift" "1")
-   (set_attr "arch" "32,a")
-   (set_attr "type" "arlo_shift,arlo_shift_reg")])
+   (set_attr "arch" "32,a,a")
+   (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")])
 
 (define_insn "*cmpsi_shiftsi_swp"
   [(set (reg:CC_SWP CC_REGNUM)
 	(compare:CC_SWP (match_operator:SI 3 "shift_operator"
-			 [(match_operand:SI 1 "s_register_operand" "r,r")
-			  (match_operand:SI 2 "shift_amount_operand" "M,rM")])
-			(match_operand:SI 0 "s_register_operand" "r,r")))]
+			 [(match_operand:SI 1 "s_register_operand" "r,r,r")
+			  (match_operand:SI 2 "shift_amount_operand" "M,r,M")])
+			(match_operand:SI 0 "s_register_operand" "r,r,r")))]
   "TARGET_32BIT"
   "cmp%?\\t%0, %1%S3"
   [(set_attr "conds" "set")
    (set_attr "shift" "1")
-   (set_attr "arch" "32,a")
-   (set_attr "type" "arlo_shift,arlo_shift_reg")])
+   (set_attr "arch" "32,a,a")
+   (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")])
 
 (define_insn "*arm_cmpsi_negshiftsi_si"
   [(set (reg:CC_Z CC_REGNUM)
@@ -8560,8 +8291,8 @@
   "cmn%?\\t%0, %2%S1"
   [(set_attr "conds" "set")
    (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
-				    (const_string "arlo_shift")
-				    (const_string "arlo_shift_reg")))
+				    (const_string "alus_shift_imm")
+				    (const_string "alus_shift_reg")))
    (set_attr "predicable" "yes")]
 )
 
@@ -8603,7 +8334,8 @@
     operands[2] = gen_lowpart (SImode, operands[2]);
   }
   [(set_attr "conds" "set")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*arm_cmpdi_unsigned"
@@ -8631,7 +8363,8 @@
   [(set_attr "conds" "set")
    (set_attr "enabled_for_depr_it" "yes,yes,no")
    (set_attr "arch" "t2,t2,*")
-   (set_attr "length" "6,6,8")]
+   (set_attr "length" "6,6,8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*arm_cmpdi_zero"
@@ -8641,7 +8374,8 @@
    (clobber (match_scratch:SI 1 "=r"))]
   "TARGET_32BIT"
   "orr%.\\t%1, %Q0, %R0"
-  [(set_attr "conds" "set")]
+  [(set_attr "conds" "set")
+   (set_attr "type" "logics_reg")]
 )
 
 (define_insn "*thumb_cmpdi_zero"
@@ -8652,7 +8386,8 @@
   "TARGET_THUMB1"
   "orr\\t%1, %Q0, %R0"
   [(set_attr "conds" "set")
-   (set_attr "length" "2")]
+   (set_attr "length" "2")
+   (set_attr "type" "logics_reg")]
 )
 
 ; This insn allows redundant compares to be removed by cse, nothing should
@@ -8666,7 +8401,8 @@
   "TARGET_32BIT"
   "\\t%@ deleted compare"
   [(set_attr "conds" "set")
-   (set_attr "length" "0")]
+   (set_attr "length" "0")
+   (set_attr "type" "no_insn")]
 )
 
 
@@ -8767,7 +8503,8 @@
                          (const_int 0)))]
   ""
   [(set_attr "conds" "use")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*mov_negscc"
@@ -8785,7 +8522,8 @@
     operands[3] = GEN_INT (~0);
   }
   [(set_attr "conds" "use")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*mov_notscc"
@@ -8804,7 +8542,8 @@
     operands[4] = GEN_INT (~0);
   }
   [(set_attr "conds" "use")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_expand "cstoresi4"
@@ -9009,7 +8748,8 @@
   "@
    neg\\t%0, %1\;adc\\t%0, %0, %1
    neg\\t%2, %1\;adc\\t%0, %1, %2"
-  [(set_attr "length" "4")]
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*cstoresi_ne0_thumb1_insn"
@@ -9029,7 +8769,8 @@
 			(match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r"))))]
   "TARGET_THUMB1"
   "cmp\\t%1, %2\;sbc\\t%0, %0, %0"
-  [(set_attr "length" "4")]
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "cstoresi_ltu_thumb1"
@@ -9043,7 +8784,8 @@
 	(neg:SI (ltu:SI (match_dup 1) (match_dup 2))))
    (set (match_dup 0) (neg:SI (match_dup 3)))]
   "operands[3] = gen_reg_rtx (SImode);"
-  [(set_attr "length" "4")]
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
 )
 
 ;; Used as part of the expansion of thumb les sequence.
@@ -9055,7 +8797,8 @@
 			 (match_operand:SI 4 "thumb1_cmp_operand" "lI"))))]
   "TARGET_THUMB1"
   "cmp\\t%3, %4\;adc\\t%0, %1, %2"
-  [(set_attr "length" "4")]
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
 )
 
 
@@ -9273,7 +9016,8 @@
 		(and (ge (minus (match_dup 0) (pc)) (const_int -2044))
 		     (le (minus (match_dup 0) (pc)) (const_int 2048))))
 	   (const_int 2)
-	   (const_int 4)))]
+	   (const_int 4)))
+   (set_attr "type" "branch")]
 )
 
 (define_insn "*thumb_jump"
@@ -9295,7 +9039,8 @@
 	    (and (ge (minus (match_dup 0) (pc)) (const_int -2044))
 		 (le (minus (match_dup 0) (pc)) (const_int 2048)))
   	    (const_int 2)
-	    (const_int 4)))]
+	    (const_int 4)))
+   (set_attr "type" "branch")]
 )
 
 (define_expand "call"
@@ -9779,7 +9524,8 @@
   "TARGET_ARM"
   "teq\\t%|r0, %|r0\;teq\\t%|pc, %|pc"
   [(set_attr "length" "8")
-   (set_attr "conds" "set")]
+   (set_attr "conds" "set")
+   (set_attr "type" "multiple")]
 )
 
 ;; Call subroutine returning any type.
@@ -9970,7 +9716,8 @@
     return   \"cmp\\t%0, %1\;ldrls\\t%|pc, [%|pc, %0, asl #2]\;b\\t%l3\";
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "12")]
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
 )
 
 (define_expand "thumb1_casesi_internal_pic"
@@ -10001,7 +9748,8 @@
               (clobber (reg:SI LR_REGNUM))])]
   "TARGET_THUMB1"
   "* return thumb1_output_casesi(operands);"
-  [(set_attr "length" "4")]
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
 )
 
 (define_expand "indirect_jump"
@@ -10027,7 +9775,8 @@
 	(match_operand:SI 0 "s_register_operand" "r"))]
   "TARGET_ARM"
   "mov%?\\t%|pc, %0\\t%@ indirect register jump"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "branch")]
 )
 
 (define_insn "*load_indirect_jump"
@@ -10048,7 +9797,8 @@
   "TARGET_THUMB1"
   "mov\\tpc, %0"
   [(set_attr "conds" "clob")
-   (set_attr "length" "2")]
+   (set_attr "length" "2")
+   (set_attr "type" "branch")]
 )
 
 
@@ -10067,7 +9817,8 @@
   [(set (attr "length")
 	(if_then_else (eq_attr "is_thumb" "yes")
 		      (const_int 2)
-		      (const_int 4)))]
+		      (const_int 4)))
+   (set_attr "type" "mov_reg")]
 )
 
 
@@ -10103,7 +9854,7 @@
 			  (if_then_else
 			   (match_operand:SI 3 "mult_operator" "")
 			   (const_string "no") (const_string "yes"))])
-   (set_attr "type" "arlo_shift,arlo_shift,arlo_shift,arlo_shift_reg")])
+   (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_imm,alu_shift_reg")])
 
 (define_split
   [(set (match_operand:SI 0 "s_register_operand" "")
@@ -10140,7 +9891,7 @@
   [(set_attr "conds" "set")
    (set_attr "shift" "4")
    (set_attr "arch" "32,a")
-   (set_attr "type" "arlo_shift,arlo_shift_reg")])
+   (set_attr "type" "alus_shift_imm,alus_shift_reg")])
 
 (define_insn "*arith_shiftsi_compare0_scratch"
   [(set (reg:CC_NOOV CC_REGNUM)
@@ -10157,7 +9908,7 @@
   [(set_attr "conds" "set")
    (set_attr "shift" "4")
    (set_attr "arch" "32,a")
-   (set_attr "type" "arlo_shift,arlo_shift_reg")])
+   (set_attr "type" "alus_shift_imm,alus_shift_reg")])
 
 (define_insn "*sub_shiftsi"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
@@ -10170,41 +9921,41 @@
   [(set_attr "predicable" "yes")
    (set_attr "shift" "3")
    (set_attr "arch" "32,a")
-   (set_attr "type" "arlo_shift,arlo_shift_reg")])
+   (set_attr "type" "alus_shift_imm,alus_shift_reg")])
 
 (define_insn "*sub_shiftsi_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV
-	 (minus:SI (match_operand:SI 1 "s_register_operand" "r,r")
+	 (minus:SI (match_operand:SI 1 "s_register_operand" "r,r,r")
 		   (match_operator:SI 2 "shift_operator"
-		    [(match_operand:SI 3 "s_register_operand" "r,r")
-		     (match_operand:SI 4 "shift_amount_operand" "M,rM")]))
+		    [(match_operand:SI 3 "s_register_operand" "r,r,r")
+		     (match_operand:SI 4 "shift_amount_operand" "M,r,M")]))
 	 (const_int 0)))
-   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+   (set (match_operand:SI 0 "s_register_operand" "=r,r,r")
 	(minus:SI (match_dup 1)
 		  (match_op_dup 2 [(match_dup 3) (match_dup 4)])))]
   "TARGET_32BIT"
   "sub%.\\t%0, %1, %3%S2"
   [(set_attr "conds" "set")
    (set_attr "shift" "3")
-   (set_attr "arch" "32,a")
-   (set_attr "type" "arlo_shift,arlo_shift_reg")])
+   (set_attr "arch" "32,a,a")
+   (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")])
 
 (define_insn "*sub_shiftsi_compare0_scratch"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV
-	 (minus:SI (match_operand:SI 1 "s_register_operand" "r,r")
+	 (minus:SI (match_operand:SI 1 "s_register_operand" "r,r,r")
 		   (match_operator:SI 2 "shift_operator"
-		    [(match_operand:SI 3 "s_register_operand" "r,r")
-		     (match_operand:SI 4 "shift_amount_operand" "M,rM")]))
+		    [(match_operand:SI 3 "s_register_operand" "r,r,r")
+		     (match_operand:SI 4 "shift_amount_operand" "M,r,M")]))
 	 (const_int 0)))
-   (clobber (match_scratch:SI 0 "=r,r"))]
+   (clobber (match_scratch:SI 0 "=r,r,r"))]
   "TARGET_32BIT"
   "sub%.\\t%0, %1, %3%S2"
   [(set_attr "conds" "set")
    (set_attr "shift" "3")
-   (set_attr "arch" "32,a")
-   (set_attr "type" "arlo_shift,arlo_shift_reg")])
+   (set_attr "arch" "32,a,a")
+   (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")])
 
 
 (define_insn_and_split "*and_scc"
@@ -10232,7 +9983,7 @@
     operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
   }
   [(set_attr "conds" "use")
-   (set_attr "type" "mov_reg")
+   (set_attr "type" "multiple")
    (set_attr "length" "8")]
 )
 
@@ -10266,7 +10017,8 @@
     operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
   }
   [(set_attr "conds" "use")
-   (set_attr "length" "4,8")]
+   (set_attr "length" "4,8")
+   (set_attr "type" "logic_imm,multiple")]
 )
 
 ; A series of splitters for the compare_scc pattern below.  Note that
@@ -10368,7 +10120,9 @@
   else
     rc = reverse_condition (rc);
   operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, tmp1, const0_rtx);
-})
+}
+  [(set_attr "type" "multiple")]
+)
 
 ;; Attempt to improve the sequence generated by the compare_scc splitters
 ;; not to use conditional execution.
@@ -10411,7 +10165,7 @@
 		 (geu:SI (reg:CC CC_REGNUM) (const_int 0))))]
 )
 
-;; Rd = (eq (reg1) (reg2/imm))	// ARMv5
+;; Rd = (eq (reg1) (reg2/imm))	// ARMv5 and optimising for speed.
 ;;	sub  Rd, Reg1, reg2
 ;;	clz  Rd, Rd
 ;;	lsr  Rd, Rd, #5
@@ -10423,14 +10177,15 @@
 	      (set (match_operand:SI 0 "register_operand" "") (const_int 0)))
    (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0))
 	      (set (match_dup 0) (const_int 1)))]
-  "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)"
+  "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)
+  && !(TARGET_THUMB2 && optimize_insn_for_size_p ())"
   [(set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))
    (set (match_dup 0) (clz:SI (match_dup 0)))
    (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))]
 )
 
 
-;; Rd = (eq (reg1) (reg2/imm))	// ! ARMv5
+;; Rd = (eq (reg1) (reg2))	// ! ARMv5 or optimising for size.
 ;;	sub  T1, Reg1, reg2
 ;;	negs Rd, T1
 ;;	adc  Rd, Rd, T1
@@ -10444,7 +10199,7 @@
 	      (set (match_dup 0) (const_int 1)))
    (match_scratch:SI 3 "r")]
   "TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)"
-  [(set (match_dup 3) (minus:SI (match_dup 1) (match_dup 2)))
+  [(set (match_dup 3) (match_dup 4))
    (parallel
     [(set (reg:CC CC_REGNUM)
 	  (compare:CC (const_int 0) (match_dup 3)))
@@ -10452,7 +10207,12 @@
    (set (match_dup 0)
 	(plus:SI (plus:SI (match_dup 0) (match_dup 3))
 		 (geu:SI (reg:CC CC_REGNUM) (const_int 0))))]
-)
+  "
+  if (CONST_INT_P (operands[2]))
+    operands[4] = plus_constant (SImode, operands[1], -INTVAL (operands[2]));
+  else
+    operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[2]);
+  ")
 
 (define_insn "*cond_move"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
@@ -10479,7 +10239,7 @@
     return \"\";
   "
   [(set_attr "conds" "use")
-   (set_attr "type" "mov_reg")
+   (set_attr "type" "mov_reg,mov_reg,multiple")
    (set_attr "length" "4,4,8")]
 )
 
@@ -10506,7 +10266,8 @@
     return \"%i5%d4\\t%0, %1, #1\";
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "12")]
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*cond_sub"
@@ -10524,7 +10285,8 @@
     return \"sub%d4\\t%0, %1, #1\";
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,12")]
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*cmp_ite0"
@@ -10588,6 +10350,7 @@
   }"
   [(set_attr "conds" "set")
    (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+   (set_attr "type" "multiple")
    (set_attr_alternative "length"
       [(const_int 6)
        (const_int 8)
@@ -10687,7 +10450,8 @@
            (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
            (const_int 8)
-           (const_int 10))])]
+           (const_int 10))])
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*cmp_and"
@@ -10768,7 +10532,8 @@
            (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
            (const_int 8)
-           (const_int 10))])]
+           (const_int 10))])
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*cmp_ior"
@@ -10849,7 +10614,8 @@
            (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
            (const_int 8)
-           (const_int 10))])]
+           (const_int 10))])
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*ior_scc_scc"
@@ -10878,7 +10644,9 @@
 						  DOM_CC_X_OR_Y),
 		    CC_REGNUM);"
   [(set_attr "conds" "clob")
-   (set_attr "length" "16")])
+   (set_attr "length" "16")
+   (set_attr "type" "multiple")]
+)
 
 ; If the above pattern is followed by a CMP insn, then the compare is 
 ; redundant, since we can rework the conditional instruction that follows.
@@ -10906,7 +10674,9 @@
    (set (match_dup 7) (ne:SI (match_dup 0) (const_int 0)))]
   ""
   [(set_attr "conds" "set")
-   (set_attr "length" "16")])
+   (set_attr "length" "16")
+   (set_attr "type" "multiple")]
+)
 
 (define_insn_and_split "*and_scc_scc"
   [(set (match_operand:SI 0 "s_register_operand" "=Ts")
@@ -10936,7 +10706,9 @@
 						  DOM_CC_X_AND_Y),
 		    CC_REGNUM);"
   [(set_attr "conds" "clob")
-   (set_attr "length" "16")])
+   (set_attr "length" "16")
+   (set_attr "type" "multiple")]
+)
 
 ; If the above pattern is followed by a CMP insn, then the compare is 
 ; redundant, since we can rework the conditional instruction that follows.
@@ -10964,7 +10736,9 @@
    (set (match_dup 7) (ne:SI (match_dup 0) (const_int 0)))]
   ""
   [(set_attr "conds" "set")
-   (set_attr "length" "16")])
+   (set_attr "length" "16")
+   (set_attr "type" "multiple")]
+)
 
 ;; If there is no dominance in the comparison, then we can still save an
 ;; instruction in the AND case, since we can know that the second compare
@@ -10998,7 +10772,9 @@
    operands[8] = gen_rtx_COMPARE (GET_MODE (operands[7]), operands[4],
 				  operands[5]);"
   [(set_attr "conds" "clob")
-   (set_attr "length" "20")])
+   (set_attr "length" "20")
+   (set_attr "type" "multiple")]
+)
 
 (define_split
   [(set (reg:CC_NOOV CC_REGNUM)
@@ -11109,7 +10885,8 @@
      FAIL;
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "12")]
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "movcond_addsi"
@@ -11147,7 +10924,8 @@
   }
   "
   [(set_attr "conds" "clob")
-   (set_attr "enabled_for_depr_it" "no,yes,yes")]
+   (set_attr "enabled_for_depr_it" "no,yes,yes")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "movcond"
@@ -11210,7 +10988,8 @@
   return \"\";
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,8,12")]
+   (set_attr "length" "8,8,12")
+   (set_attr "type" "multiple")]
 )
 
 ;; ??? The patterns below need checking for Thumb-2 usefulness.
@@ -11228,7 +11007,8 @@
   "TARGET_ARM"
   "#"
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,12")]
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*if_plus_move"
@@ -11250,11 +11030,11 @@
    (set_attr "length" "4,4,8,8")
    (set_attr_alternative "type"
                          [(if_then_else (match_operand 3 "const_int_operand" "")
-                                        (const_string "arlo_imm" )
-                                        (const_string "*"))
-                          (const_string "arlo_imm")
-                          (const_string "*")
-                          (const_string "*")])]
+                                        (const_string "alu_imm" )
+                                        (const_string "alu_reg"))
+                          (const_string "alu_imm")
+                          (const_string "alu_reg")
+                          (const_string "alu_reg")])]
 )
 
 (define_insn "*ifcompare_move_plus"
@@ -11270,7 +11050,8 @@
   "TARGET_ARM"
   "#"
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,12")]
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*if_move_plus"
@@ -11290,13 +11071,7 @@
    sub%D4\\t%0, %2, #%n3\;mov%d4\\t%0, %1"
   [(set_attr "conds" "use")
    (set_attr "length" "4,4,8,8")
-   (set_attr_alternative "type"
-                         [(if_then_else (match_operand 3 "const_int_operand" "")
-                                        (const_string "arlo_imm" )
-                                        (const_string "*"))
-                          (const_string "arlo_imm")
-                          (const_string "*")
-                          (const_string "*")])]
+   (set_attr "type" "alu_reg,alu_imm,multiple,multiple")]
 )
 
 (define_insn "*ifcompare_arith_arith"
@@ -11314,7 +11089,8 @@
   "TARGET_ARM"
   "#"
   [(set_attr "conds" "clob")
-   (set_attr "length" "12")]
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*if_arith_arith"
@@ -11330,7 +11106,8 @@
   "TARGET_ARM"
   "%I6%d5\\t%0, %1, %2\;%I7%D5\\t%0, %3, %4"
   [(set_attr "conds" "use")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*ifcompare_arith_move"
@@ -11371,7 +11148,8 @@
   return \"\";
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,12")]
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*if_arith_move"
@@ -11388,7 +11166,7 @@
    %I5%d4\\t%0, %2, %3\;mov%D4\\t%0, %1"
   [(set_attr "conds" "use")
    (set_attr "length" "4,8")
-   (set_attr "type" "*,*")]
+   (set_attr "type" "alu_shift_reg,multiple")]
 )
 
 (define_insn "*ifcompare_move_arith"
@@ -11430,7 +11208,8 @@
   return \"%I7%D6\\t%0, %2, %3\";
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,12")]
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*if_move_arith"
@@ -11448,7 +11227,7 @@
    %I5%D4\\t%0, %2, %3\;mov%d4\\t%0, %1"
   [(set_attr "conds" "use")
    (set_attr "length" "4,8")
-   (set_attr "type" "*,*")]
+   (set_attr "type" "alu_shift_reg,multiple")]
 )
 
 (define_insn "*ifcompare_move_not"
@@ -11464,7 +11243,8 @@
   "TARGET_ARM"
   "#"
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,12")]
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*if_move_not"
@@ -11481,7 +11261,8 @@
    mvn%d4\\t%0, #%B1\;mvn%D4\\t%0, %2"
   [(set_attr "conds" "use")
    (set_attr "type" "mvn_reg")
-   (set_attr "length" "4,8,8")]
+   (set_attr "length" "4,8,8")
+   (set_attr "type" "mvn_reg,multiple,multiple")]
 )
 
 (define_insn "*ifcompare_not_move"
@@ -11497,7 +11278,8 @@
   "TARGET_ARM"
   "#"
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,12")]
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*if_not_move"
@@ -11513,7 +11295,7 @@
    mov%D4\\t%0, %1\;mvn%d4\\t%0, %2
    mvn%D4\\t%0, #%B1\;mvn%d4\\t%0, %2"
   [(set_attr "conds" "use")
-   (set_attr "type" "mvn_reg")
+   (set_attr "type" "mvn_reg,multiple,multiple")
    (set_attr "length" "4,8,8")]
 )
 
@@ -11531,7 +11313,8 @@
   "TARGET_ARM"
   "#"
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,12")]
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*if_shift_move"
@@ -11551,9 +11334,7 @@
   [(set_attr "conds" "use")
    (set_attr "shift" "2")
    (set_attr "length" "4,8,8")
-   (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
-		      (const_string "mov_shift")
-		      (const_string "mov_shift_reg")))]
+   (set_attr "type" "mov_shift_reg,multiple,multiple")]
 )
 
 (define_insn "*ifcompare_move_shift"
@@ -11570,7 +11351,8 @@
   "TARGET_ARM"
   "#"
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,12")]
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*if_move_shift"
@@ -11590,9 +11372,7 @@
   [(set_attr "conds" "use")
    (set_attr "shift" "2")
    (set_attr "length" "4,8,8")
-   (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
-		      (const_string "mov_shift")
-		      (const_string "mov_shift_reg")))]
+   (set_attr "type" "mov_shift_reg,multiple,multiple")]
 )
 
 (define_insn "*ifcompare_shift_shift"
@@ -11611,7 +11391,8 @@
   "TARGET_ARM"
   "#"
   [(set_attr "conds" "clob")
-   (set_attr "length" "12")]
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*if_shift_shift"
@@ -11651,7 +11432,8 @@
   "TARGET_ARM"
   "#"
   [(set_attr "conds" "clob")
-   (set_attr "length" "12")]
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*if_not_arith"
@@ -11684,7 +11466,8 @@
   "TARGET_ARM"
   "#"
   [(set_attr "conds" "clob")
-   (set_attr "length" "12")]
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*if_arith_not"
@@ -11699,7 +11482,7 @@
   "TARGET_ARM"
   "mvn%D5\\t%0, %1\;%I6%d5\\t%0, %2, %3"
   [(set_attr "conds" "use")
-   (set_attr "type" "mvn_reg")
+   (set_attr "type" "multiple")
    (set_attr "length" "8")]
 )
 
@@ -11715,7 +11498,8 @@
   "TARGET_ARM"
   "#"
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,12")]
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*if_neg_move"
@@ -11731,7 +11515,8 @@
    mov%D4\\t%0, %1\;rsb%d4\\t%0, %2, #0
    mvn%D4\\t%0, #%B1\;rsb%d4\\t%0, %2, #0"
   [(set_attr "conds" "use")
-   (set_attr "length" "4,8,8")]
+   (set_attr "length" "4,8,8")
+   (set_attr "type" "logic_shift_imm,multiple,multiple")]
 )
 
 (define_insn "*ifcompare_move_neg"
@@ -11746,7 +11531,8 @@
   "TARGET_ARM"
   "#"
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,12")]
+   (set_attr "length" "8,12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*if_move_neg"
@@ -11762,7 +11548,8 @@
    mov%d4\\t%0, %1\;rsb%D4\\t%0, %2, #0
    mvn%d4\\t%0, #%B1\;rsb%D4\\t%0, %2, #0"
   [(set_attr "conds" "use")
-   (set_attr "length" "4,8,8")]
+   (set_attr "length" "4,8,8")
+   (set_attr "type" "logic_shift_imm,multiple,multiple")]
 )
 
 (define_insn "*arith_adjacentmem"
@@ -11960,7 +11747,8 @@
   [(unspec_volatile [(const_int 0)] VUNSPEC_THUMB1_INTERWORK)]
   "TARGET_THUMB1"
   "* return thumb1_output_interwork ();"
-  [(set_attr "length" "8")]
+  [(set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 ;; Note - although unspec_volatile's USE all hard registers,
@@ -12147,7 +11935,7 @@
    mvn%D4\\t%0, %2
    mov%d4\\t%0, %1\;mvn%D4\\t%0, %2"
   [(set_attr "conds" "use")
-   (set_attr "type" "mvn_reg")
+   (set_attr "type" "mvn_reg,multiple")
    (set_attr "length" "4,8")]
 )
 
@@ -12167,7 +11955,8 @@
     return \"mvnne\\t%0, #0\";
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*not_signextract_onebit"
@@ -12185,7 +11974,8 @@
     return \"movne\\t%0, #0\";
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "12")]
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
 )
 ;; ??? The above patterns need auditing for Thumb-2
 
@@ -12247,7 +12037,8 @@
 		    UNSPEC_PRLG_STK))]
   ""
   ""
-  [(set_attr "length" "0")]
+  [(set_attr "length" "0")
+   (set_attr "type" "block")]
 )
 
 ;; Pop (as used in epilogue RTL)
@@ -12377,6 +12168,7 @@
   assemble_align (32);
   return \"\";
   "
+  [(set_attr "type" "no_insn")]
 )
 
 (define_insn "align_8"
@@ -12386,6 +12178,7 @@
   assemble_align (64);
   return \"\";
   "
+  [(set_attr "type" "no_insn")]
 )
 
 (define_insn "consttable_end"
@@ -12395,6 +12188,7 @@
   making_const_table = FALSE;
   return \"\";
   "
+  [(set_attr "type" "no_insn")]
 )
 
 (define_insn "consttable_1"
@@ -12406,7 +12200,8 @@
   assemble_zeros (3);
   return \"\";
   "
-  [(set_attr "length" "4")]
+  [(set_attr "length" "4")
+   (set_attr "type" "no_insn")]
 )
 
 (define_insn "consttable_2"
@@ -12419,7 +12214,8 @@
   assemble_zeros (2);
   return \"\";
   "
-  [(set_attr "length" "4")]
+  [(set_attr "length" "4")
+   (set_attr "type" "no_insn")]
 )
 
 (define_insn "consttable_4"
@@ -12455,7 +12251,8 @@
       }
     return \"\";
   }"
-  [(set_attr "length" "4")]
+  [(set_attr "length" "4")
+   (set_attr "type" "no_insn")]
 )
 
 (define_insn "consttable_8"
@@ -12479,7 +12276,8 @@
       }
     return \"\";
   }"
-  [(set_attr "length" "8")]
+  [(set_attr "length" "8")
+   (set_attr "type" "no_insn")]
 )
 
 (define_insn "consttable_16"
@@ -12503,7 +12301,8 @@
       }
     return \"\";
   }"
-  [(set_attr "length" "16")]
+  [(set_attr "length" "16")
+   (set_attr "type" "no_insn")]
 )
 
 ;; Miscellaneous Thumb patterns
@@ -12531,7 +12330,8 @@
    (use (label_ref (match_operand 1 "" "")))]
   "TARGET_THUMB1"
   "mov\\t%|pc, %0"
-  [(set_attr "length" "2")]
+  [(set_attr "length" "2")
+   (set_attr "type" "no_insn")]
 )
 
 ;; V5 Instructions,
@@ -12573,7 +12373,9 @@
 	     (match_operand:SI 1 "" "")
 	     (match_operand:SI 2 "" ""))]
   "TARGET_32BIT && arm_arch5e"
-  "pld\\t%a0")
+  "pld\\t%a0"
+  [(set_attr "type" "load1")]
+)
 
 ;; General predication pattern
 
@@ -12590,7 +12392,8 @@
   [(unspec:SI [(match_operand:SI 0 "register_operand" "")] UNSPEC_REGISTER_USE)]
   ""
   "%@ %0 needed"
-  [(set_attr "length" "0")]
+  [(set_attr "length" "0")
+   (set_attr "type" "no_insn")]
 )
 
 
@@ -12638,6 +12441,7 @@
     thumb_set_return_address (operands[0], operands[1]);
     DONE;
   }"
+  [(set_attr "type" "mov_reg")]
 )
 
 
@@ -12648,7 +12452,8 @@
 	(unspec:SI [(const_int 0)] UNSPEC_TLS))]
   "TARGET_HARD_TP"
   "mrc%?\\tp15, 0, %0, c13, c0, 3\\t@ load_tp_hard"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "mrs")]
 )
 
 ;; Doesn't clobber R1-R3.  Must use r0 for the first operand.
@@ -12659,7 +12464,8 @@
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_SOFT_TP"
   "bl\\t__aeabi_read_tp\\t@ load_tp_soft"
-  [(set_attr "conds" "clob")]
+  [(set_attr "conds" "clob")
+   (set_attr "type" "branch")]
 )
 
 ;; tls descriptor call
@@ -12678,7 +12484,8 @@
     return "bl\\t%c0(tlscall)";
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "4")]
+   (set_attr "length" "4")
+   (set_attr "type" "branch")]
 )
 
 ;; For thread pointer builtin
@@ -12704,7 +12511,8 @@
   "movt%?\t%0, %L1"
  [(set_attr "predicable" "yes")
   (set_attr "predicable_short_it" "no")
-  (set_attr "length" "4")]
+  (set_attr "length" "4")
+  (set_attr "type" "mov_imm")]
 )
 
 (define_insn "*arm_rev"
@@ -12716,7 +12524,8 @@
    rev%?\t%0, %1
    rev%?\t%0, %1"
   [(set_attr "arch" "t1,t2,32")
-   (set_attr "length" "2,2,4")]
+   (set_attr "length" "2,2,4")
+   (set_attr "type" "rev")]
 )
 
 (define_expand "arm_legacy_rev"
@@ -12816,7 +12625,8 @@
   revsh%?\t%0, %1
   revsh%?\t%0, %1"
   [(set_attr "arch" "t1,t2,32")
-   (set_attr "length" "2,2,4")]
+   (set_attr "length" "2,2,4")
+   (set_attr "type" "rev")]
 )
 
 (define_insn "*arm_rev16"
@@ -12828,7 +12638,8 @@
    rev16%?\t%0, %1
    rev16%?\t%0, %1"
   [(set_attr "arch" "t1,t2,32")
-   (set_attr "length" "2,2,4")]
+   (set_attr "length" "2,2,4")
+   (set_attr "type" "rev")]
 )
 
 (define_expand "bswaphi2"
diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md
index 317e4cd4ad6..7df84d52481 100644
--- a/gcc/config/arm/arm1020e.md
+++ b/gcc/config/arm/arm1020e.md
@@ -66,14 +66,21 @@
 ;; ALU operations with no shifted operand
 (define_insn_reservation "1020alu_op" 1 
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\
-                       mov_imm,mov_reg,mvn_imm,mvn_reg"))
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,\
+                       mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                       multiple,no_insn"))
  "1020a_e,1020a_m,1020a_w")
 
 ;; ALU operations with a shift-by-constant operand
 (define_insn_reservation "1020alu_shift_op" 1 
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "type" "extend,arlo_shift,mov_shift,mvn_shift"))
+      (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm,\
+                       extend,mov_shift,mvn_shift"))
  "1020a_e,1020a_m,1020a_w")
 
 ;; ALU operations with a shift-by-register operand
@@ -82,7 +89,9 @@
 ;; the execute stage.
 (define_insn_reservation "1020alu_shift_reg_op" 2 
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg"))
+      (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
+                       mov_shift_reg,mvn_shift_reg"))
  "1020a_e*2,1020a_m,1020a_w")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -270,7 +279,7 @@
 ;; first execute state.  We model this by using 1020a_e in the first cycle.
 (define_insn_reservation "v10_ffarith" 5
  (and (eq_attr "vfp10" "yes")
-      (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd"))
+      (eq_attr "type" "fmov,ffariths,ffarithd,fcmps,fcmpd"))
  "1020a_e+v10_fmac")
 
 (define_insn_reservation "v10_farith" 5
@@ -280,7 +289,7 @@
 
 (define_insn_reservation "v10_cvt" 5
  (and (eq_attr "vfp10" "yes")
-      (eq_attr "type" "f_cvt"))
+      (eq_attr "type" "f_cvt,f_cvti2f,f_cvtf2i"))
  "1020a_e+v10_fmac")
 
 (define_insn_reservation "v10_fmul" 6
@@ -290,12 +299,12 @@
 
 (define_insn_reservation "v10_fdivs" 18
  (and (eq_attr "vfp10" "yes")
-      (eq_attr "type" "fdivs"))
+      (eq_attr "type" "fdivs, fsqrts"))
  "1020a_e+v10_ds*14")
 
 (define_insn_reservation "v10_fdivd" 32
  (and (eq_attr "vfp10" "yes")
-      (eq_attr "type" "fdivd"))
+      (eq_attr "type" "fdivd, fsqrtd"))
  "1020a_e+v10_fmac+v10_ds*28")
 
 (define_insn_reservation "v10_floads" 4
@@ -316,7 +325,7 @@
 
 (define_insn_reservation "v10_c2v" 4
  (and (eq_attr "vfp10" "yes")
-      (eq_attr "type" "r_2_f"))
+      (eq_attr "type" "f_mcr,f_mcrr"))
  "1020a_e+1020l_e+v10_ls1,v10_ls2")
 
 (define_insn_reservation "v10_fstores" 1
@@ -331,7 +340,7 @@
 
 (define_insn_reservation "v10_v2c" 1
  (and (eq_attr "vfp10" "yes")
-      (eq_attr "type" "f_2_r"))
+      (eq_attr "type" "f_mrc,f_mrrc"))
  "1020a_e+1020l_e,1020l_m,1020l_w")
 
 (define_insn_reservation "v10_to_cpsr" 2
diff --git a/gcc/config/arm/arm1026ejs.md b/gcc/config/arm/arm1026ejs.md
index 9112122d67b..f5a0447f5da 100644
--- a/gcc/config/arm/arm1026ejs.md
+++ b/gcc/config/arm/arm1026ejs.md
@@ -66,14 +66,21 @@
 ;; ALU operations with no shifted operand
 (define_insn_reservation "alu_op" 1 
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\
-                       mov_imm,mov_reg,mvn_imm,mvn_reg"))
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,\
+                       mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                       multiple,no_insn"))
  "a_e,a_m,a_w")
 
 ;; ALU operations with a shift-by-constant operand
 (define_insn_reservation "alu_shift_op" 1 
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "type" "extend,arlo_shift,mov_shift,mvn_shift"))
+      (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm,\
+                       extend,mov_shift,mvn_shift"))
  "a_e,a_m,a_w")
 
 ;; ALU operations with a shift-by-register operand
@@ -82,7 +89,9 @@
 ;; the execute stage.
 (define_insn_reservation "alu_shift_reg_op" 2 
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg"))
+      (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
+                       mov_shift_reg,mvn_shift_reg"))
  "a_e*2,a_m,a_w")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/arm1136jfs.md b/gcc/config/arm/arm1136jfs.md
index f83b9d14f2b..f6e0b8da8b6 100644
--- a/gcc/config/arm/arm1136jfs.md
+++ b/gcc/config/arm/arm1136jfs.md
@@ -75,14 +75,21 @@
 ;; ALU operations with no shifted operand
 (define_insn_reservation "11_alu_op" 2
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\
-                       mov_imm,mov_reg,mvn_imm,mvn_reg"))
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,\
+                       mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                       multiple,no_insn"))
  "e_1,e_2,e_3,e_wb")
 
 ;; ALU operations with a shift-by-constant operand
 (define_insn_reservation "11_alu_shift_op" 2
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "type" "extend,arlo_shift,mov_shift,mvn_shift"))
+      (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm,\
+                       extend,mov_shift,mvn_shift"))
  "e_1,e_2,e_3,e_wb")
 
 ;; ALU operations with a shift-by-register operand
@@ -91,7 +98,9 @@
 ;; the shift stage.
 (define_insn_reservation "11_alu_shift_reg_op" 3
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg"))
+      (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
+                       mov_shift_reg,mvn_shift_reg"))
  "e_1*2,e_2,e_3,e_wb")
 
 ;; alu_ops can start sooner, if there is no shifter dependency
diff --git a/gcc/config/arm/arm926ejs.md b/gcc/config/arm/arm926ejs.md
index 8c38e86ce66..d2b0e9e3cf8 100644
--- a/gcc/config/arm/arm926ejs.md
+++ b/gcc/config/arm/arm926ejs.md
@@ -58,9 +58,16 @@
 ;; ALU operations with no shifted operand
 (define_insn_reservation "9_alu_op" 1 
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,extend,arlo_shift,\
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm,\
+                       shift_imm,shift_reg,extend,\
                        mov_imm,mov_reg,mov_shift,\
-                       mvn_imm,mvn_reg,mvn_shift"))
+                       mvn_imm,mvn_reg,mvn_shift,\
+                       multiple,no_insn"))
  "e,m,w")
 
 ;; ALU operations with a shift-by-register operand
@@ -69,7 +76,9 @@
 ;; the execute stage.
 (define_insn_reservation "9_alu_shift_reg_op" 2 
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg"))
+      (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
+                       mov_shift_reg,mvn_shift_reg"))
  "e*2,m,w")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/cortex-a15-neon.md b/gcc/config/arm/cortex-a15-neon.md
index bfa2f5e8818..6eb8268321a 100644
--- a/gcc/config/arm/cortex-a15-neon.md
+++ b/gcc/config/arm/cortex-a15-neon.md
@@ -93,389 +93,345 @@
 
 (define_insn_reservation  "cortex_a15_neon_int_1" 5
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type" "neon_int_1"))
+       (eq_attr "type" "neon_int_1"))
   "ca15_issue1,ca15_cx_ialu")
 
 (define_insn_reservation  "cortex_a15_neon_int_2" 5
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type" "neon_int_2"))
+       (eq_attr "type" "neon_int_2"))
   "ca15_issue1,ca15_cx_ialu")
 
 (define_insn_reservation  "cortex_a15_neon_int_3" 5
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type" "neon_int_3"))
+       (eq_attr "type" "neon_int_3"))
   "ca15_issue1,ca15_cx_ialu")
 
 (define_insn_reservation  "cortex_a15_neon_int_4" 5
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type" "neon_int_4"))
+       (eq_attr "type" "neon_int_4"))
   "ca15_issue1,ca15_cx_ialu")
 
 (define_insn_reservation  "cortex_a15_neon_int_5" 5
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type" "neon_int_5"))
+       (eq_attr "type" "neon_int_5"))
   "ca15_issue1,ca15_cx_ialu")
 
 (define_insn_reservation  "cortex_a15_neon_vqneg_vqabs" 5
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type" "neon_vqneg_vqabs"))
+       (eq_attr "type" "neon_vqneg_vqabs"))
   "ca15_issue1,ca15_cx_ialu")
 
 (define_insn_reservation  "cortex_a15_neon_vmov" 5
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type" "neon_vmov"))
+       (eq_attr "type" "neon_vmov"))
   "ca15_issue1,ca15_cx_ialu")
 
 (define_insn_reservation  "cortex_a15_neon_vaba" 7
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type" "neon_vaba"))
+       (eq_attr "type" "neon_vaba"))
   "ca15_issue1,ca15_cx_ialu_with_acc")
 
 (define_insn_reservation  "cortex_a15_neon_vaba_qqq" 8
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type" "neon_vaba_qqq"))
+       (eq_attr "type" "neon_vaba_qqq"))
   "ca15_issue2,ca15_cx_ialu_with_acc*2")
 
 (define_insn_reservation
   "cortex_a15_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"))
+       (eq_attr "type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"))
   "ca15_issue1,ca15_cx_imac")
 
 (define_insn_reservation "cortex_a15_neon_mul_qqq_8_16_32_ddd_32" 7
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32"))
+       (eq_attr "type" "neon_mul_qqq_8_16_32_ddd_32"))
   "ca15_issue1,ca15_cx_imac*2")
 
 (define_insn_reservation
   "cortex_a15_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
+       (eq_attr "type"
               "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))
   "ca15_issue1,ca15_cx_imac*2")
 
 (define_insn_reservation
   "cortex_a15_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
+       (eq_attr "type"
               "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"))
   "ca15_issue1,ca15_cx_imac")
 
 (define_insn_reservation
   "cortex_a15_neon_mla_qqq_8_16" 7
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
+       (eq_attr "type"
               "neon_mla_qqq_8_16"))
   "ca15_issue1,ca15_cx_imac*2")
 
 (define_insn_reservation
   "cortex_a15_neon_mla_ddd_32_qqd_16_ddd_32_scalar_\
-     qdd_64_32_long_scalar_qdd_64_32_long" 7
+     qdd_64_32_lotype_qdd_64_32_long" 7
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-  "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+       (eq_attr "type"  "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
   "ca15_issue1,ca15_cx_imac")
 
 (define_insn_reservation
   "cortex_a15_neon_mla_qqq_32_qqd_32_scalar" 7
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_mla_qqq_32_qqd_32_scalar"))
+       (eq_attr "type" "neon_mla_qqq_32_qqd_32_scalar"))
   "ca15_issue1,ca15_cx_imac*2")
 
 (define_insn_reservation
   "cortex_a15_neon_mul_ddd_16_scalar_32_16_long_scalar" 6
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_mul_ddd_16_scalar_32_16_long_scalar"))
+       (eq_attr "type" "neon_mul_ddd_16_scalar_32_16_long_scalar"))
   "ca15_issue1,ca15_cx_imac")
 
 (define_insn_reservation
   "cortex_a15_neon_mul_qqd_32_scalar" 7
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_mul_qqd_32_scalar"))
+       (eq_attr "type" "neon_mul_qqd_32_scalar"))
   "ca15_issue1,ca15_cx_imac*2")
 
 (define_insn_reservation
   "cortex_a15_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"))
+       (eq_attr "type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"))
   "ca15_issue1,ca15_cx_imac")
 
 (define_insn_reservation
   "cortex_a15_neon_shift_1" 5
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_shift_1"))
+       (eq_attr "type" "neon_shift_1"))
   "ca15_issue1,ca15_cx_ik+ca15_cx_ishf")
 
 (define_insn_reservation
   "cortex_a15_neon_shift_2" 5
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_shift_2"))
+       (eq_attr "type" "neon_shift_2"))
   "ca15_issue1,ca15_cx_ik+ca15_cx_ishf")
 
 (define_insn_reservation
   "cortex_a15_neon_shift_3" 6
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_shift_3"))
+       (eq_attr "type" "neon_shift_3"))
   "ca15_issue2,(ca15_cx_ik+ca15_cx_ishf)*2")
 
 (define_insn_reservation
   "cortex_a15_neon_vshl_ddd" 5
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vshl_ddd"))
+       (eq_attr "type" "neon_vshl_ddd"))
   "ca15_issue1,ca15_cx_ik+ca15_cx_ishf")
 
 (define_insn_reservation
   "cortex_a15_neon_vqshl_vrshl_vqrshl_qqq" 6
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vqshl_vrshl_vqrshl_qqq"))
+       (eq_attr "type" "neon_vqshl_vrshl_vqrshl_qqq"))
   "ca15_issue2,(ca15_cx_ik+ca15_cx_ishf)*2")
 
 (define_insn_reservation
   "cortex_a15_neon_vsra_vrsra" 7
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vsra_vrsra"))
+       (eq_attr "type" "neon_vsra_vrsra"))
   "ca15_issue1,ca15_cx_ishf_with_acc")
 
 (define_insn_reservation
   "cortex_a15_neon_fp_vadd_ddd_vabs_dd" 6
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_fp_vadd_ddd_vabs_dd"))
+       (eq_attr "type" "neon_fp_vadd_ddd_vabs_dd"))
   "ca15_issue1,ca15_cx_falu")
 
 (define_insn_reservation
   "cortex_a15_neon_fp_vadd_qqq_vabs_qq" 7
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_fp_vadd_qqq_vabs_qq"))
+       (eq_attr "type" "neon_fp_vadd_qqq_vabs_qq"))
   "ca15_issue2,ca15_cx_falu_2")
 
 (define_insn_reservation
   "cortex_a15_neon_fp_vmul_ddd" 5
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_fp_vmul_ddd"))
+       (eq_attr "type" "neon_fp_vmul_ddd"))
   "ca15_issue1,ca15_cx_fmul")
 
 (define_insn_reservation
   "cortex_a15_neon_fp_vmul_qqd" 6
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_fp_vmul_qqd"))
+       (eq_attr "type" "neon_fp_vmul_qqd"))
   "ca15_issue2,ca15_cx_fmul_2")
 
 (define_insn_reservation
   "cortex_a15_neon_fp_vmla_ddd" 9
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_fp_vmla_ddd"))
+       (eq_attr "type" "neon_fp_vmla_ddd"))
   "ca15_issue1,ca15_cx_fmac")
 
 (define_insn_reservation
   "cortex_a15_neon_fp_vmla_qqq" 11
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_fp_vmla_qqq"))
+       (eq_attr "type" "neon_fp_vmla_qqq"))
   "ca15_issue2,ca15_cx_fmac_2")
 
 (define_insn_reservation
   "cortex_a15_neon_fp_vmla_ddd_scalar" 9
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_fp_vmla_ddd_scalar"))
+       (eq_attr "type" "neon_fp_vmla_ddd_scalar"))
   "ca15_issue1,ca15_cx_fmac")
 
 (define_insn_reservation
   "cortex_a15_neon_fp_vmla_qqq_scalar" 11
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_fp_vmla_qqq_scalar"))
+       (eq_attr "type" "neon_fp_vmla_qqq_scalar"))
   "ca15_issue2,ca15_cx_fmac_2")
 
 (define_insn_reservation
   "cortex_a15_neon_fp_vrecps_vrsqrts_ddd" 9
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_fp_vrecps_vrsqrts_ddd"))
+       (eq_attr "type" "neon_fp_vrecps_vrsqrts_ddd"))
   "ca15_issue1,ca15_cx_fmac")
 
 (define_insn_reservation
   "cortex_a15_neon_fp_vrecps_vrsqrts_qqq" 11
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_fp_vrecps_vrsqrts_qqq"))
+       (eq_attr "type" "neon_fp_vrecps_vrsqrts_qqq"))
   "ca15_issue2,ca15_cx_fmac_2")
 
 (define_insn_reservation
   "cortex_a15_neon_bp_simple" 4
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_bp_simple"))
+       (eq_attr "type" "neon_bp_simple"))
   "ca15_issue3,ca15_ls+ca15_cx_perm_2,ca15_cx_perm")
 
 (define_insn_reservation
   "cortex_a15_neon_bp_2cycle" 4
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_bp_2cycle"))
+       (eq_attr "type" "neon_bp_2cycle"))
   "ca15_issue1,ca15_cx_perm")
 
 (define_insn_reservation
   "cortex_a15_neon_bp_3cycle" 7
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_bp_3cycle"))
+       (eq_attr "type" "neon_bp_3cycle"))
   "ca15_issue3,ca15_cx_ialu+ca15_cx_perm_2,ca15_cx_perm")
 
 (define_insn_reservation
   "cortex_a15_neon_vld1_1_2_regs" 7
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vld1_1_2_regs"))
+       (eq_attr "type" "neon_vld1_1_2_regs"))
   "ca15_issue2,ca15_ls,ca15_ldr")
 
 (define_insn_reservation
   "cortex_a15_neon_vld1_3_4_regs" 8
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vld1_3_4_regs"))
+       (eq_attr "type" "neon_vld1_3_4_regs"))
   "ca15_issue3,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr")
 
 (define_insn_reservation
   "cortex_a15_neon_vld2_2_regs_vld1_vld2_all_lanes" 9
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vld2_2_regs_vld1_vld2_all_lanes"))
+       (eq_attr "type" "neon_vld2_2_regs_vld1_vld2_all_lanes"))
   "ca15_issue3,ca15_ls,ca15_ldr")
 
 (define_insn_reservation
   "cortex_a15_neon_vld2_4_regs" 12
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vld2_4_regs"))
+       (eq_attr "type" "neon_vld2_4_regs"))
   "ca15_issue3,ca15_issue3+ca15_ls1+ca15_ls2,ca15_ldr*2")
 
 (define_insn_reservation
   "cortex_a15_neon_vld3_vld4" 12
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vld3_vld4"))
+       (eq_attr "type" "neon_vld3_vld4"))
   "ca15_issue3,ca15_issue3+ca15_ls1+ca15_ls2,ca15_ldr*2")
 
 (define_insn_reservation
   "cortex_a15_neon_vst1_1_2_regs_vst2_2_regs" 0
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vst1_1_2_regs_vst2_2_regs"))
+       (eq_attr "type" "neon_vst1_1_2_regs_vst2_2_regs"))
   "ca15_issue3,ca15_issue3+ca15_cx_perm+ca15_ls1+ca15_ls2,ca15_str*2")
 
 (define_insn_reservation
   "cortex_a15_neon_vst1_3_4_regs" 0
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vst1_3_4_regs"))
+       (eq_attr "type" "neon_vst1_3_4_regs"))
   "ca15_issue3,ca15_issue3+ca15_ls1+ca15_ls2,ca15_str*3")
 
 (define_insn_reservation
   "cortex_a15_neon_vst2_4_regs_vst3_vst4" 0
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vst2_4_regs_vst3_vst4"))
+       (eq_attr "type" "neon_vst2_4_regs_vst3_vst4"))
   "ca15_issue3,ca15_issue3+ca15_cx_perm_2+ca15_ls1+ca15_ls2,\
    ca15_issue3+ca15_str,ca15_str*3")
 
 (define_insn_reservation
   "cortex_a15_neon_vst3_vst4" 0
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vst3_vst4"))
+       (eq_attr "type" "neon_vst3_vst4"))
   "ca15_issue3,ca15_issue3+ca15_cx_perm_2+ca15_ls1+ca15_ls2,ca15_str*4")
 
 (define_insn_reservation
   "cortex_a15_neon_vld1_vld2_lane" 9
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vld1_vld2_lane"))
+       (eq_attr "type" "neon_vld1_vld2_lane"))
   "ca15_issue3,ca15_ls,ca15_ldr")
 
 (define_insn_reservation
   "cortex_a15_neon_vld3_vld4_lane" 10
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vld3_vld4_lane"))
+       (eq_attr "type" "neon_vld3_vld4_lane"))
   "ca15_issue3,ca15_issue3+ca15_ls,ca15_issue3+ca15_ldr")
 
 (define_insn_reservation
   "cortex_a15_neon_vst1_vst2_lane" 0
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vst1_vst2_lane"))
+       (eq_attr "type" "neon_vst1_vst2_lane"))
   "ca15_issue3,ca15_cx_perm+ca15_ls,ca15_str")
 
 (define_insn_reservation
   "cortex_a15_neon_vst3_vst4_lane" 0
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vst3_vst4_lane"))
+       (eq_attr "type" "neon_vst3_vst4_lane"))
   "ca15_issue3,ca15_issue3+ca15_cx_perm+ca15_ls1+ca15_ls2,ca15_str*2")
 
 (define_insn_reservation
   "cortex_a15_neon_vld3_vld4_all_lanes" 11
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_vld3_vld4_all_lanes"))
+       (eq_attr "type" "neon_vld3_vld4_all_lanes"))
   "ca15_issue3,ca15_issue3+ca15_ls,ca15_ldr")
 
 (define_insn_reservation
   "cortex_a15_neon_ldm_2" 20
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_ldm_2"))
+       (eq_attr "type" "neon_ldm_2"))
   "ca15_issue3*6")
 
 (define_insn_reservation
   "cortex_a15_neon_stm_2" 0
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_stm_2"))
+       (eq_attr "type" "neon_stm_2"))
   "ca15_issue3*6")
 
 (define_insn_reservation
   "cortex_a15_neon_mcr" 6
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_mcr"))
+       (eq_attr "type" "neon_mcr"))
   "ca15_issue2,ca15_ls,ca15_cx_perm")
 
 (define_insn_reservation
   "cortex_a15_neon_mcr_2_mcrr" 6
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_mcr_2_mcrr"))
+       (eq_attr "type" "neon_mcr_2_mcrr"))
   "ca15_issue2,ca15_ls1+ca15_ls2")
 
 (define_insn_reservation
   "cortex_a15_neon_mrc" 5
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_mrc"))
+       (eq_attr "type" "neon_mrc"))
   "ca15_issue1,ca15_ls")
 
 (define_insn_reservation
   "cortex_a15_neon_mrrc" 6
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "neon_type"
-              "neon_mrrc"))
+       (eq_attr "type" "neon_mrrc"))
   "ca15_issue2,ca15_ls1+ca15_ls2")
 
 (define_insn_reservation "cortex_a15_vfp_const" 4
@@ -515,7 +471,7 @@
 
 (define_insn_reservation "cortex_a15_vfp_cvt" 6
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "type" "f_cvt"))
+       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
   "ca15_issue1,ca15_cx_vfp")
 
 (define_insn_reservation "cortex_a15_vfp_cmpd" 8
@@ -535,7 +491,7 @@
 
 (define_insn_reservation "cortex_a15_vfp_cpys" 4
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "type" "fcpys"))
+       (eq_attr "type" "fmov"))
   "ca15_issue1,ca15_cx_perm")
 
 (define_insn_reservation "cortex_a15_vfp_ariths" 7
@@ -545,12 +501,12 @@
 
 (define_insn_reservation "cortex_a15_vfp_divs" 10
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "type" "fdivs"))
+       (eq_attr "type" "fdivs, fsqrts"))
   "ca15_issue1,ca15_cx_ik")
 
 (define_insn_reservation "cortex_a15_vfp_divd" 18
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "type" "fdivd"))
+       (eq_attr "type" "fdivd, fsqrtd"))
   "ca15_issue1,ca15_cx_ik")
 
 ;; Define bypasses.
diff --git a/gcc/config/arm/cortex-a15.md b/gcc/config/arm/cortex-a15.md
index 4ad87121d6d..ccad6207608 100644
--- a/gcc/config/arm/cortex-a15.md
+++ b/gcc/config/arm/cortex-a15.md
@@ -61,25 +61,32 @@
 ;; Simple ALU without shift
 (define_insn_reservation "cortex_a15_alu" 2
   (and (eq_attr "tune" "cortexa15")
-       (and (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\
-                             mov_imm,mov_reg,\
-                             mvn_imm,mvn_reg")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                        alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,rev,\
+                        shift_imm,shift_reg,\
+                        mov_imm,mov_reg,\
+                        mvn_imm,mvn_reg,\
+                        mrs,multiple,no_insn"))
   "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)")
 
 ;; ALU ops with immediate shift
 (define_insn_reservation "cortex_a15_alu_shift" 3
   (and (eq_attr "tune" "cortexa15")
-       (and (eq_attr "type" "extend,arlo_shift,,mov_shift,mvn_shift")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "extend,\
+                        alu_shift_imm,alus_shift_imm,\
+                        logic_shift_imm,logics_shift_imm,\
+                        mov_shift,mvn_shift"))
   "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\
 	       |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)")
 
 ;; ALU ops with register controlled shift
 (define_insn_reservation "cortex_a15_alu_shift_reg" 3
   (and (eq_attr "tune" "cortexa15")
-       (and (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg")
-	    (eq_attr "neon_type" "none")))
+       (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                        logic_shift_reg,logics_shift_reg,\
+                        mov_shift_reg,mvn_shift_reg"))
   "(ca15_issue2,ca15_sx1+ca15_sx2,ca15_sx1_shf,ca15_sx2_alu)\
    |(ca15_issue1,(ca15_issue1+ca15_sx2,ca15_sx1+ca15_sx2_shf)\
    |(ca15_issue1+ca15_sx1,ca15_sx1+ca15_sx1_shf),ca15_sx1_alu)")
@@ -89,15 +96,13 @@
 ;; 32-bit multiplies
 (define_insn_reservation "cortex_a15_mult32" 3
   (and (eq_attr "tune" "cortexa15")
-       (and (eq_attr "mul32" "yes")
-	    (eq_attr "neon_type" "none")))
+       (eq_attr "mul32" "yes"))
   "ca15_issue1,ca15_mx")
 
 ;; 64-bit multiplies
 (define_insn_reservation "cortex_a15_mult64" 4
   (and (eq_attr "tune" "cortexa15")
-       (and (eq_attr "mul64" "yes")
-	    (eq_attr "neon_type" "none")))
+       (eq_attr "mul64" "yes"))
   "ca15_issue1,ca15_mx*2")
 
 ;; Integer divide
@@ -114,8 +119,7 @@
 ;; Block all issue pipes for a cycle
 (define_insn_reservation "cortex_a15_block" 1
   (and (eq_attr "tune" "cortexa15")
-       (and (eq_attr "type" "block")
-	    (eq_attr "neon_type" "none")))
+       (eq_attr "type" "block"))
   "ca15_issue3")
 
 ;; Branch execution Unit
@@ -124,8 +128,7 @@
 ;; No latency as there is no result
 (define_insn_reservation "cortex_a15_branch" 0
   (and (eq_attr "tune" "cortexa15")
-       (and (eq_attr "type" "branch")
-	    (eq_attr "neon_type" "none")))
+       (eq_attr "type" "branch"))
   "ca15_issue1,ca15_bx")
 
 ;; Load-store execution Unit
@@ -133,29 +136,25 @@
 ;; Loads of up to two words.
 (define_insn_reservation "cortex_a15_load1" 4
   (and (eq_attr "tune" "cortexa15")
-       (and (eq_attr "type" "load_byte,load1,load2")
-	    (eq_attr "neon_type" "none")))
+       (eq_attr "type" "load_byte,load1,load2"))
   "ca15_issue1,ca15_ls,ca15_ldr,nothing")
 
 ;; Loads of three or four words.
 (define_insn_reservation "cortex_a15_load3" 5
   (and (eq_attr "tune" "cortexa15")
-       (and (eq_attr "type" "load3,load4")
-	    (eq_attr "neon_type" "none")))
+       (eq_attr "type" "load3,load4"))
   "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr,nothing")
 
 ;; Stores of up to two words.
 (define_insn_reservation "cortex_a15_store1" 0
   (and (eq_attr "tune" "cortexa15")
-       (and (eq_attr "type" "store1,store2")
-	    (eq_attr "neon_type" "none")))
+       (eq_attr "type" "store1,store2"))
   "ca15_issue1,ca15_ls,ca15_str")
 
 ;; Stores of three or four words.
 (define_insn_reservation "cortex_a15_store3" 0
   (and (eq_attr "tune" "cortexa15")
-       (and (eq_attr "type" "store3,store4")
-	    (eq_attr "neon_type" "none")))
+       (eq_attr "type" "store3,store4"))
   "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str,ca15_str")
 
 ;; We include Neon.md here to ensure that the branch can block the Neon units.
@@ -165,8 +164,7 @@
 ;; pipeline.  The result however is available the next cycle.
 (define_insn_reservation "cortex_a15_call" 1
   (and (eq_attr "tune" "cortexa15")
-       (and (eq_attr "type" "call")
-	    (eq_attr "neon_type" "none")))
+       (eq_attr "type" "call"))
   "ca15_issue3,\
    ca15_sx1+ca15_sx2+ca15_bx+ca15_mx+ca15_cx_ij+ca15_cx_ik+ca15_ls1+ca15_ls2+\
    ca15_cx_imac1+ca15_cx_ialu1+ca15_cx_ialu2+ca15_cx_ishf+\
diff --git a/gcc/config/arm/cortex-a5.md b/gcc/config/arm/cortex-a5.md
index 1400c47d95a..22e0a08f38e 100644
--- a/gcc/config/arm/cortex-a5.md
+++ b/gcc/config/arm/cortex-a5.md
@@ -58,13 +58,22 @@
 
 (define_insn_reservation "cortex_a5_alu" 2
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\
-                        mov_imm,mov_reg,mvn_imm,mvn_reg"))
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                        alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,rev,\
+                        shift_imm,shift_reg,\
+                        mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                        mrs,multiple,no_insn"))
   "cortex_a5_ex1")
 
 (define_insn_reservation "cortex_a5_alu_shift" 2
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "extend,arlo_shift,arlo_shift_reg,\
+       (eq_attr "type" "extend,\
+                        alu_shift_imm,alus_shift_imm,\
+                        logic_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,alus_shift_reg,\
+                        logic_shift_reg,logics_shift_reg,\
                         mov_shift,mov_shift_reg,\
                         mvn_shift,mvn_shift_reg"))
   "cortex_a5_ex1")
@@ -159,7 +168,8 @@
 
 (define_insn_reservation "cortex_a5_fpalu" 4
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys, fmuls, f_cvt,\
+       (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov, fmuls,\
+                        f_cvt,f_cvtf2i,f_cvti2f,\
 			fcmps, fcmpd"))
   "cortex_a5_ex1+cortex_a5_fpadd_pipe")
 
@@ -223,14 +233,14 @@
 
 (define_insn_reservation "cortex_a5_fdivs" 14
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "fdivs"))
+       (eq_attr "type" "fdivs, fsqrts"))
   "cortex_a5_ex1, cortex_a5_fp_div_sqrt * 13")
 
 ;; ??? Similarly for fdivd.
 
 (define_insn_reservation "cortex_a5_fdivd" 29
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "fdivd"))
+       (eq_attr "type" "fdivd, fsqrtd"))
   "cortex_a5_ex1, cortex_a5_fp_div_sqrt * 28")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -243,12 +253,12 @@
 
 (define_insn_reservation "cortex_a5_r2f" 4
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "r_2_f"))
+       (eq_attr "type" "f_mcr,f_mcrr"))
   "cortex_a5_ex1")
 
 (define_insn_reservation "cortex_a5_f2r" 2
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "f_2_r"))
+       (eq_attr "type" "f_mrc,f_mrrc"))
   "cortex_a5_ex1")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md
index 2f9107994c9..48d0d03853f 100644
--- a/gcc/config/arm/cortex-a53.md
+++ b/gcc/config/arm/cortex-a53.md
@@ -67,14 +67,22 @@
 
 (define_insn_reservation "cortex_a53_alu" 2
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\
-                        mov_imm,mov_reg,mvn_imm,mvn_reg"))
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                        alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,csel,rev,\
+                        shift_imm,shift_reg,\
+                        mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                        mrs,multiple,no_insn"))
   "cortex_a53_slot_any")
 
 (define_insn_reservation "cortex_a53_alu_shift" 2
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "arlo_shift,arlo_shift_reg,\
-                        mov_shift,mov_shift_reg,\
+       (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                        logic_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,alus_shift_reg,\
+                        logic_shift_reg,logics_shift_reg,\
+                        extend,mov_shift,mov_shift_reg,\
                         mvn_shift,mvn_shift_reg"))
   "cortex_a53_slot_any")
 
@@ -130,12 +138,12 @@
 
 (define_insn_reservation "cortex_a53_load1" 3
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "load_byte,load1"))
+       (eq_attr "type" "load_byte,load1,load_acq"))
   "cortex_a53_slot_any+cortex_a53_ls")
 
 (define_insn_reservation "cortex_a53_store1" 2
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "store1"))
+       (eq_attr "type" "store1,store_rel"))
   "cortex_a53_slot_any+cortex_a53_ls+cortex_a53_store")
 
 (define_insn_reservation "cortex_a53_load2" 3
@@ -201,8 +209,9 @@
 
 (define_insn_reservation "cortex_a53_fpalu" 4
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys, fmuls, f_cvt,\
-			fcmps, fcmpd"))
+       (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov, fmuls,\
+                        f_cvt,f_cvtf2i,f_cvti2f,\
+			fcmps, fcmpd, fcsel"))
   "cortex_a53_slot0+cortex_a53_fpadd_pipe")
 
 (define_insn_reservation "cortex_a53_fconst" 2
@@ -230,12 +239,12 @@
 
 (define_insn_reservation "cortex_a53_fdivs" 14
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "fdivs"))
+       (eq_attr "type" "fdivs, fsqrts"))
   "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 13")
 
 (define_insn_reservation "cortex_a53_fdivd" 29
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "fdivd"))
+       (eq_attr "type" "fdivd, fsqrtd"))
   "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 28")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -244,12 +253,12 @@
 
 (define_insn_reservation "cortex_a53_r2f" 4
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "r_2_f"))
+       (eq_attr "type" "f_mcr,f_mcrr"))
   "cortex_a53_slot0")
 
 (define_insn_reservation "cortex_a53_f2r" 2
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "f_2_r"))
+       (eq_attr "type" "f_mrc,f_mrrc"))
   "cortex_a53_slot0")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md
index e14413d5083..a72a88d90af 100644
--- a/gcc/config/arm/cortex-a7.md
+++ b/gcc/config/arm/cortex-a7.md
@@ -67,8 +67,7 @@
 
 (define_insn_reservation "cortex_a7_branch" 0
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "branch")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "branch"))
   "(cortex_a7_ex2|cortex_a7_ex1)+cortex_a7_branch")
 
 ;; Call cannot dual-issue as an older instruction. It can dual-issue
@@ -77,8 +76,7 @@
 ;; cycle.
 (define_insn_reservation "cortex_a7_call" 1
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "call")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "call"))
   "(cortex_a7_ex2|cortex_a7_both)+cortex_a7_branch")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -88,27 +86,31 @@
 ;; ALU instruction with an immediate operand can dual-issue.
 (define_insn_reservation "cortex_a7_alu_imm" 2
   (and (eq_attr "tune" "cortexa7")
-       (and (ior (eq_attr "type" "arlo_imm,mov_imm,mvn_imm")
-                 (ior (eq_attr "type" "extend")
-                      (and (eq_attr "type" "mov_reg,mov_shift,mov_shift_reg")
-                           (not (eq_attr "length" "8")))))
-            (eq_attr "neon_type" "none")))
+       (ior (eq_attr "type" "adr,alu_imm,alus_imm,logic_imm,logics_imm,\
+                             mov_imm,mvn_imm,extend")
+            (and (eq_attr "type" "mov_reg,mov_shift,mov_shift_reg")
+                 (not (eq_attr "length" "8")))))
   "cortex_a7_ex2|cortex_a7_ex1")
 
 ;; ALU instruction with register operands can dual-issue
 ;; with a younger immediate-based instruction.
 (define_insn_reservation "cortex_a7_alu_reg" 2
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "arlo_reg,shift,shift_reg,mov_reg,mvn_reg")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        bfm,rev,\
+                        shift_imm,shift_reg,mov_reg,mvn_reg"))
   "cortex_a7_ex1")
 
 (define_insn_reservation "cortex_a7_alu_shift" 2
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "arlo_shift,arlo_shift_reg,\
-                             mov_shift,mov_shift_reg,\
-                             mvn_shift,mvn_shift_reg")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                        logic_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,alus_shift_reg,\
+                        logic_shift_reg,logics_shift_reg,\
+                        mov_shift,mov_shift_reg,\
+                        mvn_shift,mvn_shift_reg,\
+                        mrs,multiple,no_insn"))
   "cortex_a7_ex1")
 
 ;; Forwarding path for unshifted operands.
@@ -129,9 +131,8 @@
 
 (define_insn_reservation "cortex_a7_mul" 2
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "neon_type" "none")
-            (ior (eq_attr "mul32" "yes")
-                 (eq_attr "mul64" "yes"))))
+       (ior (eq_attr "mul32" "yes")
+            (eq_attr "mul64" "yes")))
   "cortex_a7_both")
 
 ;; Forward the result of a multiply operation to the accumulator 
@@ -156,50 +157,42 @@
 
 (define_insn_reservation "cortex_a7_load1" 2
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "load_byte,load1")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "load_byte,load1"))
   "cortex_a7_ex1")
 
 (define_insn_reservation "cortex_a7_store1" 0
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "store1")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "store1"))
   "cortex_a7_ex1")
 
 (define_insn_reservation "cortex_a7_load2" 2
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "load2")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "load2"))
   "cortex_a7_both")
 
 (define_insn_reservation "cortex_a7_store2" 0
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "store2")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "store2"))
   "cortex_a7_both")
 
 (define_insn_reservation "cortex_a7_load3" 3
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "load3")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "load3"))
   "cortex_a7_both, cortex_a7_ex1")
 
 (define_insn_reservation "cortex_a7_store3" 0
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "store4")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "store4"))
   "cortex_a7_both, cortex_a7_ex1")
 
 (define_insn_reservation "cortex_a7_load4" 3
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "load4")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "load4"))
   "cortex_a7_both, cortex_a7_both")
 
 (define_insn_reservation "cortex_a7_store4" 0
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "store3")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "store3"))
   "cortex_a7_both, cortex_a7_both")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -211,9 +204,8 @@
 
 (define_insn_reservation "cortex_a7_fpalu" 4
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys,\
-                             f_cvt, fcmps, fcmpd")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov,\
+                        f_cvt, f_cvtf2i, f_cvti2f, fcmps, fcmpd"))
   "cortex_a7_ex1+cortex_a7_fpadd_pipe")
 
 ;; For fconsts and fconstd, 8-bit immediate data is passed directly from
@@ -221,8 +213,7 @@
 
 (define_insn_reservation "cortex_a7_fconst" 3
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "fconsts,fconstd")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "fconsts,fconstd"))
   "cortex_a7_ex1+cortex_a7_fpadd_pipe")
 
 ;; We should try not to attempt to issue a single-precision multiplication in
@@ -231,13 +222,12 @@
 
 (define_insn_reservation "cortex_a7_fpmuls" 4
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "fmuls")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "fmuls"))
   "cortex_a7_ex1+cortex_a7_fpmul_pipe")
 
 (define_insn_reservation "cortex_a7_neon_mul" 4
   (and (eq_attr "tune" "cortexa7")
-       (eq_attr "neon_type"
+       (eq_attr "type"
                 "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
                  neon_mul_qqq_8_16_32_ddd_32,\
                  neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\
@@ -249,13 +239,12 @@
 
 (define_insn_reservation "cortex_a7_fpmacs" 8
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "fmacs,ffmas")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "fmacs,ffmas"))
   "cortex_a7_ex1+cortex_a7_fpmul_pipe")
 
 (define_insn_reservation "cortex_a7_neon_mla" 8
   (and (eq_attr "tune" "cortexa7")
-       (eq_attr "neon_type"
+       (eq_attr "type"
                 "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
                  neon_mla_qqq_8_16,\
                  neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\
@@ -276,20 +265,17 @@
 
 (define_insn_reservation "cortex_a7_fpmuld" 7
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "fmuld")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "fmuld"))
   "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*3")
 
 (define_insn_reservation "cortex_a7_fpmacd" 11
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "fmacd")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "fmacd"))
   "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*3")
 
 (define_insn_reservation "cortex_a7_fpfmad" 8
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "ffmad")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "ffmad"))
   "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*4")
 
 (define_bypass 7 "cortex_a7_fpmacd"
@@ -302,14 +288,12 @@
 
 (define_insn_reservation "cortex_a7_fdivs" 16
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "fdivs")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "fdivs, fsqrts"))
   "cortex_a7_ex1+cortex_a7_fp_div_sqrt, cortex_a7_fp_div_sqrt * 13")
 
 (define_insn_reservation "cortex_a7_fdivd" 31
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "fdivd")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "fdivd, fsqrtd"))
   "cortex_a7_ex1+cortex_a7_fp_div_sqrt, cortex_a7_fp_div_sqrt * 28")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -320,14 +304,12 @@
 
 (define_insn_reservation "cortex_a7_r2f" 4
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "r_2_f")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "f_mcr,f_mcrr"))
   "cortex_a7_both")
 
 (define_insn_reservation "cortex_a7_f2r" 2
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "f_2_r")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "f_mrc,f_mrrc"))
   "cortex_a7_ex1")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -339,8 +321,7 @@
 
 (define_insn_reservation "cortex_a7_f_flags" 4
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "f_flag")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "f_flag"))
   "cortex_a7_ex1")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -349,26 +330,22 @@
 
 (define_insn_reservation "cortex_a7_f_loads" 4
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "f_loads")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "f_loads"))
   "cortex_a7_ex1")
 
 (define_insn_reservation "cortex_a7_f_loadd" 4
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "f_loadd")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "f_loadd"))
   "cortex_a7_both")
 
 (define_insn_reservation "cortex_a7_f_stores" 0
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "f_stores")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "f_stores"))
   "cortex_a7_ex1")
 
 (define_insn_reservation "cortex_a7_f_stored" 0
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "f_stored")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "f_stored"))
   "cortex_a7_both")
 
 ;; Load-to-use for floating-point values has a penalty of one cycle,
@@ -389,22 +366,21 @@
 
 (define_insn_reservation "cortex_a7_neon" 4
   (and (eq_attr "tune" "cortexa7")
-       (eq_attr "neon_type"
-                "!none,\
-                  neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
-                  neon_mul_qqq_8_16_32_ddd_32,\
-                  neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\
-                  neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
-                  neon_mla_qqq_8_16,\
-                  neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\
-                  neon_mla_qqq_32_qqd_32_scalar,\
-                  neon_mul_ddd_16_scalar_32_16_long_scalar,\
-                  neon_mul_qqd_32_scalar,\
-                  neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\
-                  neon_fp_vmul_ddd,\
-                  neon_fp_vmul_qqd,\
-                  neon_fp_vmla_ddd,\
-                  neon_fp_vmla_qqq,\
-                  neon_fp_vmla_ddd_scalar,\
-                  neon_fp_vmla_qqq_scalar"))
+       (eq_attr "type"
+                "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+                 neon_mul_qqq_8_16_32_ddd_32,\
+                 neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\
+                 neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+                 neon_mla_qqq_8_16,\
+                 neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\
+                 neon_mla_qqq_32_qqd_32_scalar,\
+                 neon_mul_ddd_16_scalar_32_16_long_scalar,\
+                 neon_mul_qqd_32_scalar,\
+                 neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\
+                 neon_fp_vmul_ddd,\
+                 neon_fp_vmul_qqd,\
+                 neon_fp_vmla_ddd,\
+                 neon_fp_vmla_qqq,\
+                 neon_fp_vmla_ddd_scalar,\
+                 neon_fp_vmla_qqq_scalar"))
   "cortex_a7_both*2")
diff --git a/gcc/config/arm/cortex-a8-neon.md b/gcc/config/arm/cortex-a8-neon.md
index 2f0cc7b3a5a..b7773891669 100644
--- a/gcc/config/arm/cortex-a8-neon.md
+++ b/gcc/config/arm/cortex-a8-neon.md
@@ -159,12 +159,12 @@
 
 (define_insn_reservation "cortex_a8_vfp_divs" 37
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "type" "fdivs"))
+       (eq_attr "type" "fdivs, fsqrts"))
   "cortex_a8_vfp,cortex_a8_vfplite*36")
 
 (define_insn_reservation "cortex_a8_vfp_divd" 65
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "type" "fdivd"))
+       (eq_attr "type" "fdivd, fsqrtd"))
   "cortex_a8_vfp,cortex_a8_vfplite*64")
 
 ;; Comparisons can actually take 7 cycles sometimes instead of four,
@@ -172,24 +172,24 @@
 ;; take four cycles, we pick that latency.
 (define_insn_reservation "cortex_a8_vfp_farith" 4
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "type" "fcpys,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd"))
+       (eq_attr "type" "fmov,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd"))
   "cortex_a8_vfp,cortex_a8_vfplite*3")
 
 (define_insn_reservation "cortex_a8_vfp_cvt" 7
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "type" "f_cvt"))
+       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
   "cortex_a8_vfp,cortex_a8_vfplite*6")
 
 ;; NEON -> core transfers.
 
 (define_insn_reservation "cortex_a8_neon_mrc" 20
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_mrc"))
+       (eq_attr "type" "neon_mrc"))
   "cortex_a8_neon_ls")
 
 (define_insn_reservation "cortex_a8_neon_mrrc" 21
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_mrrc"))
+       (eq_attr "type" "neon_mrrc"))
   "cortex_a8_neon_ls_2")
 
 ;; The remainder of this file is auto-generated by neon-schedgen.
@@ -198,48 +198,48 @@
 ;; produce a result at N3.
 (define_insn_reservation "cortex_a8_neon_int_1" 3
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_int_1"))
+       (eq_attr "type" "neon_int_1"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)m operands at N1,
 ;; their (D|Q)n operands at N2, and produce a result at N3.
 (define_insn_reservation "cortex_a8_neon_int_2" 3
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_int_2"))
+       (eq_attr "type" "neon_int_2"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N3.
 (define_insn_reservation "cortex_a8_neon_int_3" 3
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_int_3"))
+       (eq_attr "type" "neon_int_3"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N2, and
 ;; produce a result at N4.
 (define_insn_reservation "cortex_a8_neon_int_4" 4
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_int_4"))
+       (eq_attr "type" "neon_int_4"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)m operands at N1,
 ;; their (D|Q)n operands at N2, and produce a result at N4.
 (define_insn_reservation "cortex_a8_neon_int_5" 4
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_int_5"))
+       (eq_attr "type" "neon_int_5"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N4.
 (define_insn_reservation "cortex_a8_neon_vqneg_vqabs" 4
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vqneg_vqabs"))
+       (eq_attr "type" "neon_vqneg_vqabs"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation produce a result at N3.
 (define_insn_reservation "cortex_a8_neon_vmov" 3
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vmov"))
+       (eq_attr "type" "neon_vmov"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -247,7 +247,7 @@
 ;; produce a result at N6.
 (define_insn_reservation "cortex_a8_neon_vaba" 6
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vaba"))
+       (eq_attr "type" "neon_vaba"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -255,35 +255,35 @@
 ;; produce a result at N6 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_vaba_qqq" 7
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vaba_qqq"))
+       (eq_attr "type" "neon_vaba_qqq"))
   "cortex_a8_neon_dp_2")
 
 ;; Instructions using this reservation read their (D|Q)m operands at N1,
 ;; their (D|Q)d operands at N3, and produce a result at N6.
 (define_insn_reservation "cortex_a8_neon_vsma" 6
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vsma"))
+       (eq_attr "type" "neon_vsma"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N2, and
 ;; produce a result at N6.
 (define_insn_reservation "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"))
+       (eq_attr "type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N2, and
 ;; produce a result at N6 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_mul_qqq_8_16_32_ddd_32" 7
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32"))
+       (eq_attr "type" "neon_mul_qqq_8_16_32_ddd_32"))
   "cortex_a8_neon_dp_2")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
 ;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))
+       (eq_attr "type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))
   "cortex_a8_neon_dp_2")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -291,7 +291,7 @@
 ;; produce a result at N6.
 (define_insn_reservation "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"))
+       (eq_attr "type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -299,7 +299,7 @@
 ;; produce a result at N6 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_mla_qqq_8_16" 7
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_mla_qqq_8_16"))
+       (eq_attr "type" "neon_mla_qqq_8_16"))
   "cortex_a8_neon_dp_2")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -307,7 +307,7 @@
 ;; produce a result at N6 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+       (eq_attr "type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
   "cortex_a8_neon_dp_2")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -315,21 +315,21 @@
 ;; produce a result at N6 on cycle 4.
 (define_insn_reservation "cortex_a8_neon_mla_qqq_32_qqd_32_scalar" 9
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar"))
+       (eq_attr "type" "neon_mla_qqq_32_qqd_32_scalar"))
   "cortex_a8_neon_dp_4")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
 ;; their (D|Q)m operands at N1, and produce a result at N6.
 (define_insn_reservation "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar" 6
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar"))
+       (eq_attr "type" "neon_mul_ddd_16_scalar_32_16_long_scalar"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
 ;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4.
 (define_insn_reservation "cortex_a8_neon_mul_qqd_32_scalar" 9
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_mul_qqd_32_scalar"))
+       (eq_attr "type" "neon_mul_qqd_32_scalar"))
   "cortex_a8_neon_dp_4")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -337,84 +337,84 @@
 ;; produce a result at N6.
 (define_insn_reservation "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"))
+       (eq_attr "type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N3.
 (define_insn_reservation "cortex_a8_neon_shift_1" 3
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_shift_1"))
+       (eq_attr "type" "neon_shift_1"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N4.
 (define_insn_reservation "cortex_a8_neon_shift_2" 4
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_shift_2"))
+       (eq_attr "type" "neon_shift_2"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N3 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_shift_3" 4
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_shift_3"))
+       (eq_attr "type" "neon_shift_3"))
   "cortex_a8_neon_dp_2")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N1.
 (define_insn_reservation "cortex_a8_neon_vshl_ddd" 1
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vshl_ddd"))
+       (eq_attr "type" "neon_vshl_ddd"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N4 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq" 5
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq"))
+       (eq_attr "type" "neon_vqshl_vrshl_vqrshl_qqq"))
   "cortex_a8_neon_dp_2")
 
 ;; Instructions using this reservation read their (D|Q)m operands at N1,
 ;; their (D|Q)d operands at N3, and produce a result at N6.
 (define_insn_reservation "cortex_a8_neon_vsra_vrsra" 6
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vsra_vrsra"))
+       (eq_attr "type" "neon_vsra_vrsra"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N2, and
 ;; produce a result at N5.
 (define_insn_reservation "cortex_a8_neon_fp_vadd_ddd_vabs_dd" 5
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd"))
+       (eq_attr "type" "neon_fp_vadd_ddd_vabs_dd"))
   "cortex_a8_neon_fadd")
 
 ;; Instructions using this reservation read their source operands at N2, and
 ;; produce a result at N5 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_fp_vadd_qqq_vabs_qq" 6
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq"))
+       (eq_attr "type" "neon_fp_vadd_qqq_vabs_qq"))
   "cortex_a8_neon_fadd_2")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N5.
 (define_insn_reservation "cortex_a8_neon_fp_vsum" 5
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_fp_vsum"))
+       (eq_attr "type" "neon_fp_vsum"))
   "cortex_a8_neon_fadd")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
 ;; their (D|Q)m operands at N1, and produce a result at N5.
 (define_insn_reservation "cortex_a8_neon_fp_vmul_ddd" 5
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_fp_vmul_ddd"))
+       (eq_attr "type" "neon_fp_vmul_ddd"))
   "cortex_a8_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
 ;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_fp_vmul_qqd" 6
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_fp_vmul_qqd"))
+       (eq_attr "type" "neon_fp_vmul_qqd"))
   "cortex_a8_neon_dp_2")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -422,7 +422,7 @@
 ;; produce a result at N9.
 (define_insn_reservation "cortex_a8_neon_fp_vmla_ddd" 9
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_fp_vmla_ddd"))
+       (eq_attr "type" "neon_fp_vmla_ddd"))
   "cortex_a8_neon_fmul_then_fadd")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -430,7 +430,7 @@
 ;; produce a result at N9 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_fp_vmla_qqq" 10
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_fp_vmla_qqq"))
+       (eq_attr "type" "neon_fp_vmla_qqq"))
   "cortex_a8_neon_fmul_then_fadd_2")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -438,7 +438,7 @@
 ;; produce a result at N9.
 (define_insn_reservation "cortex_a8_neon_fp_vmla_ddd_scalar" 9
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar"))
+       (eq_attr "type" "neon_fp_vmla_ddd_scalar"))
   "cortex_a8_neon_fmul_then_fadd")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -446,152 +446,152 @@
 ;; produce a result at N9 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_fp_vmla_qqq_scalar" 10
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar"))
+       (eq_attr "type" "neon_fp_vmla_qqq_scalar"))
   "cortex_a8_neon_fmul_then_fadd_2")
 
 ;; Instructions using this reservation read their source operands at N2, and
 ;; produce a result at N9.
 (define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_ddd" 9
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd"))
+       (eq_attr "type" "neon_fp_vrecps_vrsqrts_ddd"))
   "cortex_a8_neon_fmul_then_fadd")
 
 ;; Instructions using this reservation read their source operands at N2, and
 ;; produce a result at N9 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_qqq" 10
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq"))
+       (eq_attr "type" "neon_fp_vrecps_vrsqrts_qqq"))
   "cortex_a8_neon_fmul_then_fadd_2")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N2.
 (define_insn_reservation "cortex_a8_neon_bp_simple" 2
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_bp_simple"))
+       (eq_attr "type" "neon_bp_simple"))
   "cortex_a8_neon_perm")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N2 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_bp_2cycle" 3
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_bp_2cycle"))
+       (eq_attr "type" "neon_bp_2cycle"))
   "cortex_a8_neon_perm_2")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N2 on cycle 3.
 (define_insn_reservation "cortex_a8_neon_bp_3cycle" 4
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_bp_3cycle"))
+       (eq_attr "type" "neon_bp_3cycle"))
   "cortex_a8_neon_perm_3")
 
 ;; Instructions using this reservation produce a result at N1.
 (define_insn_reservation "cortex_a8_neon_ldr" 1
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_ldr"))
+       (eq_attr "type" "neon_ldr"))
   "cortex_a8_neon_ls")
 
 ;; Instructions using this reservation read their source operands at N1.
 (define_insn_reservation "cortex_a8_neon_str" 0
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_str"))
+       (eq_attr "type" "neon_str"))
   "cortex_a8_neon_ls")
 
 ;; Instructions using this reservation produce a result at N1 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_vld1_1_2_regs" 2
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vld1_1_2_regs"))
+       (eq_attr "type" "neon_vld1_1_2_regs"))
   "cortex_a8_neon_ls_2")
 
 ;; Instructions using this reservation produce a result at N1 on cycle 3.
 (define_insn_reservation "cortex_a8_neon_vld1_3_4_regs" 3
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vld1_3_4_regs"))
+       (eq_attr "type" "neon_vld1_3_4_regs"))
   "cortex_a8_neon_ls_3")
 
 ;; Instructions using this reservation produce a result at N2 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes" 3
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes"))
+       (eq_attr "type" "neon_vld2_2_regs_vld1_vld2_all_lanes"))
   "cortex_a8_neon_ls_2")
 
 ;; Instructions using this reservation produce a result at N2 on cycle 3.
 (define_insn_reservation "cortex_a8_neon_vld2_4_regs" 4
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vld2_4_regs"))
+       (eq_attr "type" "neon_vld2_4_regs"))
   "cortex_a8_neon_ls_3")
 
 ;; Instructions using this reservation produce a result at N2 on cycle 4.
 (define_insn_reservation "cortex_a8_neon_vld3_vld4" 5
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vld3_vld4"))
+       (eq_attr "type" "neon_vld3_vld4"))
   "cortex_a8_neon_ls_4")
 
 ;; Instructions using this reservation read their source operands at N1.
 (define_insn_reservation "cortex_a8_neon_vst1_1_2_regs_vst2_2_regs" 0
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs"))
+       (eq_attr "type" "neon_vst1_1_2_regs_vst2_2_regs"))
   "cortex_a8_neon_ls_2")
 
 ;; Instructions using this reservation read their source operands at N1.
 (define_insn_reservation "cortex_a8_neon_vst1_3_4_regs" 0
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vst1_3_4_regs"))
+       (eq_attr "type" "neon_vst1_3_4_regs"))
   "cortex_a8_neon_ls_3")
 
 ;; Instructions using this reservation read their source operands at N1.
 (define_insn_reservation "cortex_a8_neon_vst2_4_regs_vst3_vst4" 0
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4"))
+       (eq_attr "type" "neon_vst2_4_regs_vst3_vst4"))
   "cortex_a8_neon_ls_4")
 
 ;; Instructions using this reservation read their source operands at N1.
 (define_insn_reservation "cortex_a8_neon_vst3_vst4" 0
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vst3_vst4"))
+       (eq_attr "type" "neon_vst3_vst4"))
   "cortex_a8_neon_ls_4")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N2 on cycle 3.
 (define_insn_reservation "cortex_a8_neon_vld1_vld2_lane" 4
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vld1_vld2_lane"))
+       (eq_attr "type" "neon_vld1_vld2_lane"))
   "cortex_a8_neon_ls_3")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N2 on cycle 5.
 (define_insn_reservation "cortex_a8_neon_vld3_vld4_lane" 6
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vld3_vld4_lane"))
+       (eq_attr "type" "neon_vld3_vld4_lane"))
   "cortex_a8_neon_ls_5")
 
 ;; Instructions using this reservation read their source operands at N1.
 (define_insn_reservation "cortex_a8_neon_vst1_vst2_lane" 0
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vst1_vst2_lane"))
+       (eq_attr "type" "neon_vst1_vst2_lane"))
   "cortex_a8_neon_ls_2")
 
 ;; Instructions using this reservation read their source operands at N1.
 (define_insn_reservation "cortex_a8_neon_vst3_vst4_lane" 0
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vst3_vst4_lane"))
+       (eq_attr "type" "neon_vst3_vst4_lane"))
   "cortex_a8_neon_ls_3")
 
 ;; Instructions using this reservation produce a result at N2 on cycle 2.
 (define_insn_reservation "cortex_a8_neon_vld3_vld4_all_lanes" 3
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_vld3_vld4_all_lanes"))
+       (eq_attr "type" "neon_vld3_vld4_all_lanes"))
   "cortex_a8_neon_ls_3")
 
 ;; Instructions using this reservation produce a result at N2.
 (define_insn_reservation "cortex_a8_neon_mcr" 2
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_mcr"))
+       (eq_attr "type" "neon_mcr"))
   "cortex_a8_neon_perm")
 
 ;; Instructions using this reservation produce a result at N2.
 (define_insn_reservation "cortex_a8_neon_mcr_2_mcrr" 2
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "neon_type" "neon_mcr_2_mcrr"))
+       (eq_attr "type" "neon_mcr_2_mcrr"))
   "cortex_a8_neon_perm_2")
 
 ;; Exceptions to the default latencies.
diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md
index 1113a45ff0e..1eade5e1244 100644
--- a/gcc/config/arm/cortex-a8.md
+++ b/gcc/config/arm/cortex-a8.md
@@ -85,19 +85,25 @@
 ;; (source read in E2 and destination available at the end of that cycle).
 (define_insn_reservation "cortex_a8_alu" 2
   (and (eq_attr "tune" "cortexa8")
-       (ior (and (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg")
-                 (eq_attr "neon_type" "none"))
-            (eq_attr "type" "clz")))
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                        alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,clz,rbit,rev,\
+                        shift_imm,shift_reg,\
+                        multiple,no_insn"))
   "cortex_a8_default")
 
 (define_insn_reservation "cortex_a8_alu_shift" 2
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "type" "extend,arlo_shift"))
+       (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                        logic_shift_imm,logics_shift_imm,\
+                        extend"))
   "cortex_a8_default")
 
 (define_insn_reservation "cortex_a8_alu_shift_reg" 2
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "type" "arlo_shift_reg"))
+       (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                        logic_shift_reg,logics_shift_reg"))
   "cortex_a8_default")
 
 ;; Move instructions.
@@ -105,7 +111,8 @@
 (define_insn_reservation "cortex_a8_mov" 1
   (and (eq_attr "tune" "cortexa8")
        (eq_attr "type" "mov_imm,mov_reg,mov_shift,mov_shift_reg,\
-                        mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg"))
+                        mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\
+                        mrs"))
   "cortex_a8_default")
 
 ;; Exceptions to the default latencies for data processing instructions.
diff --git a/gcc/config/arm/cortex-a9-neon.md b/gcc/config/arm/cortex-a9-neon.md
index 9688edc8f72..2c9d5db5bd8 100644
--- a/gcc/config/arm/cortex-a9-neon.md
+++ b/gcc/config/arm/cortex-a9-neon.md
@@ -109,12 +109,12 @@
 ;; NEON -> core transfers.
 (define_insn_reservation "ca9_neon_mrc" 1
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_mrc"))
+       (eq_attr "type" "neon_mrc"))
   "ca9_issue_vfp_neon + cortex_a9_neon_mcr")
 
 (define_insn_reservation "ca9_neon_mrrc" 1
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_mrrc"))
+       (eq_attr "type" "neon_mrrc"))
   "ca9_issue_vfp_neon + cortex_a9_neon_mcr")
 
 ;; The remainder of this file is auto-generated by neon-schedgen.
@@ -123,48 +123,48 @@
 ;; produce a result at N3.
 (define_insn_reservation "cortex_a9_neon_int_1" 3
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_int_1"))
+       (eq_attr "type" "neon_int_1"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)m operands at N1,
 ;; their (D|Q)n operands at N2, and produce a result at N3.
 (define_insn_reservation "cortex_a9_neon_int_2" 3
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_int_2"))
+       (eq_attr "type" "neon_int_2"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N3.
 (define_insn_reservation "cortex_a9_neon_int_3" 3
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_int_3"))
+       (eq_attr "type" "neon_int_3"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N2, and
 ;; produce a result at N4.
 (define_insn_reservation "cortex_a9_neon_int_4" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_int_4"))
+       (eq_attr "type" "neon_int_4"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)m operands at N1,
 ;; their (D|Q)n operands at N2, and produce a result at N4.
 (define_insn_reservation "cortex_a9_neon_int_5" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_int_5"))
+       (eq_attr "type" "neon_int_5"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N4.
 (define_insn_reservation "cortex_a9_neon_vqneg_vqabs" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vqneg_vqabs"))
+       (eq_attr "type" "neon_vqneg_vqabs"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation produce a result at N3.
 (define_insn_reservation "cortex_a9_neon_vmov" 3
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vmov"))
+       (eq_attr "type" "neon_vmov"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -172,7 +172,7 @@
 ;; produce a result at N6.
 (define_insn_reservation "cortex_a9_neon_vaba" 6
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vaba"))
+       (eq_attr "type" "neon_vaba"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -180,35 +180,35 @@
 ;; produce a result at N6 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_vaba_qqq" 7
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vaba_qqq"))
+       (eq_attr "type" "neon_vaba_qqq"))
   "cortex_a9_neon_dp_2")
 
 ;; Instructions using this reservation read their (D|Q)m operands at N1,
 ;; their (D|Q)d operands at N3, and produce a result at N6.
 (define_insn_reservation "cortex_a9_neon_vsma" 6
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vsma"))
+       (eq_attr "type" "neon_vsma"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N2, and
 ;; produce a result at N6.
 (define_insn_reservation "cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"))
+       (eq_attr "type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N2, and
 ;; produce a result at N6 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_mul_qqq_8_16_32_ddd_32" 7
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32"))
+       (eq_attr "type" "neon_mul_qqq_8_16_32_ddd_32"))
   "cortex_a9_neon_dp_2")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
 ;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))
+       (eq_attr "type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))
   "cortex_a9_neon_dp_2")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -216,7 +216,7 @@
 ;; produce a result at N6.
 (define_insn_reservation "cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"))
+       (eq_attr "type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -224,7 +224,7 @@
 ;; produce a result at N6 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_mla_qqq_8_16" 7
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_mla_qqq_8_16"))
+       (eq_attr "type" "neon_mla_qqq_8_16"))
   "cortex_a9_neon_dp_2")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -232,7 +232,7 @@
 ;; produce a result at N6 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+       (eq_attr "type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
   "cortex_a9_neon_dp_2")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -240,21 +240,21 @@
 ;; produce a result at N6 on cycle 4.
 (define_insn_reservation "cortex_a9_neon_mla_qqq_32_qqd_32_scalar" 9
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar"))
+       (eq_attr "type" "neon_mla_qqq_32_qqd_32_scalar"))
   "cortex_a9_neon_dp_4")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
 ;; their (D|Q)m operands at N1, and produce a result at N6.
 (define_insn_reservation "cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar" 6
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar"))
+       (eq_attr "type" "neon_mul_ddd_16_scalar_32_16_long_scalar"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
 ;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4.
 (define_insn_reservation "cortex_a9_neon_mul_qqd_32_scalar" 9
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_mul_qqd_32_scalar"))
+       (eq_attr "type" "neon_mul_qqd_32_scalar"))
   "cortex_a9_neon_dp_4")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -262,84 +262,84 @@
 ;; produce a result at N6.
 (define_insn_reservation "cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"))
+       (eq_attr "type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N3.
 (define_insn_reservation "cortex_a9_neon_shift_1" 3
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_shift_1"))
+       (eq_attr "type" "neon_shift_1"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N4.
 (define_insn_reservation "cortex_a9_neon_shift_2" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_shift_2"))
+       (eq_attr "type" "neon_shift_2"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N3 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_shift_3" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_shift_3"))
+       (eq_attr "type" "neon_shift_3"))
   "cortex_a9_neon_dp_2")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N1.
 (define_insn_reservation "cortex_a9_neon_vshl_ddd" 1
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vshl_ddd"))
+       (eq_attr "type" "neon_vshl_ddd"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N4 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_vqshl_vrshl_vqrshl_qqq" 5
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq"))
+       (eq_attr "type" "neon_vqshl_vrshl_vqrshl_qqq"))
   "cortex_a9_neon_dp_2")
 
 ;; Instructions using this reservation read their (D|Q)m operands at N1,
 ;; their (D|Q)d operands at N3, and produce a result at N6.
 (define_insn_reservation "cortex_a9_neon_vsra_vrsra" 6
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vsra_vrsra"))
+       (eq_attr "type" "neon_vsra_vrsra"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their source operands at N2, and
 ;; produce a result at N5.
 (define_insn_reservation "cortex_a9_neon_fp_vadd_ddd_vabs_dd" 5
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd"))
+       (eq_attr "type" "neon_fp_vadd_ddd_vabs_dd"))
   "cortex_a9_neon_fadd")
 
 ;; Instructions using this reservation read their source operands at N2, and
 ;; produce a result at N5 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_fp_vadd_qqq_vabs_qq" 6
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq"))
+       (eq_attr "type" "neon_fp_vadd_qqq_vabs_qq"))
   "cortex_a9_neon_fadd_2")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N5.
 (define_insn_reservation "cortex_a9_neon_fp_vsum" 5
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_fp_vsum"))
+       (eq_attr "type" "neon_fp_vsum"))
   "cortex_a9_neon_fadd")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
 ;; their (D|Q)m operands at N1, and produce a result at N5.
 (define_insn_reservation "cortex_a9_neon_fp_vmul_ddd" 5
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_fp_vmul_ddd"))
+       (eq_attr "type" "neon_fp_vmul_ddd"))
   "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
 ;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_fp_vmul_qqd" 6
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_fp_vmul_qqd"))
+       (eq_attr "type" "neon_fp_vmul_qqd"))
   "cortex_a9_neon_dp_2")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -347,7 +347,7 @@
 ;; produce a result at N9.
 (define_insn_reservation "cortex_a9_neon_fp_vmla_ddd" 9
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_fp_vmla_ddd"))
+       (eq_attr "type" "neon_fp_vmla_ddd"))
   "cortex_a9_neon_fmul_then_fadd")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -355,7 +355,7 @@
 ;; produce a result at N9 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_fp_vmla_qqq" 10
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_fp_vmla_qqq"))
+       (eq_attr "type" "neon_fp_vmla_qqq"))
   "cortex_a9_neon_fmul_then_fadd_2")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -363,7 +363,7 @@
 ;; produce a result at N9.
 (define_insn_reservation "cortex_a9_neon_fp_vmla_ddd_scalar" 9
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar"))
+       (eq_attr "type" "neon_fp_vmla_ddd_scalar"))
   "cortex_a9_neon_fmul_then_fadd")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
@@ -371,152 +371,152 @@
 ;; produce a result at N9 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_fp_vmla_qqq_scalar" 10
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar"))
+       (eq_attr "type" "neon_fp_vmla_qqq_scalar"))
   "cortex_a9_neon_fmul_then_fadd_2")
 
 ;; Instructions using this reservation read their source operands at N2, and
 ;; produce a result at N9.
 (define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_ddd" 9
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd"))
+       (eq_attr "type" "neon_fp_vrecps_vrsqrts_ddd"))
   "cortex_a9_neon_fmul_then_fadd")
 
 ;; Instructions using this reservation read their source operands at N2, and
 ;; produce a result at N9 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_qqq" 10
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq"))
+       (eq_attr "type" "neon_fp_vrecps_vrsqrts_qqq"))
   "cortex_a9_neon_fmul_then_fadd_2")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N2.
 (define_insn_reservation "cortex_a9_neon_bp_simple" 2
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_bp_simple"))
+       (eq_attr "type" "neon_bp_simple"))
   "cortex_a9_neon_perm")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N2 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_bp_2cycle" 3
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_bp_2cycle"))
+       (eq_attr "type" "neon_bp_2cycle"))
   "cortex_a9_neon_perm_2")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N2 on cycle 3.
 (define_insn_reservation "cortex_a9_neon_bp_3cycle" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_bp_3cycle"))
+       (eq_attr "type" "neon_bp_3cycle"))
   "cortex_a9_neon_perm_3")
 
 ;; Instructions using this reservation produce a result at N1.
 (define_insn_reservation "cortex_a9_neon_ldr" 1
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_ldr"))
+       (eq_attr "type" "neon_ldr"))
   "cortex_a9_neon_ls")
 
 ;; Instructions using this reservation read their source operands at N1.
 (define_insn_reservation "cortex_a9_neon_str" 0
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_str"))
+       (eq_attr "type" "neon_str"))
   "cortex_a9_neon_ls")
 
 ;; Instructions using this reservation produce a result at N1 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_vld1_1_2_regs" 2
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vld1_1_2_regs"))
+       (eq_attr "type" "neon_vld1_1_2_regs"))
   "cortex_a9_neon_ls_2")
 
 ;; Instructions using this reservation produce a result at N1 on cycle 3.
 (define_insn_reservation "cortex_a9_neon_vld1_3_4_regs" 3
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vld1_3_4_regs"))
+       (eq_attr "type" "neon_vld1_3_4_regs"))
   "cortex_a9_neon_ls_3")
 
 ;; Instructions using this reservation produce a result at N2 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes" 3
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes"))
+       (eq_attr "type" "neon_vld2_2_regs_vld1_vld2_all_lanes"))
   "cortex_a9_neon_ls_2")
 
 ;; Instructions using this reservation produce a result at N2 on cycle 3.
 (define_insn_reservation "cortex_a9_neon_vld2_4_regs" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vld2_4_regs"))
+       (eq_attr "type" "neon_vld2_4_regs"))
   "cortex_a9_neon_ls_3")
 
 ;; Instructions using this reservation produce a result at N2 on cycle 4.
 (define_insn_reservation "cortex_a9_neon_vld3_vld4" 5
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vld3_vld4"))
+       (eq_attr "type" "neon_vld3_vld4"))
   "cortex_a9_neon_ls_4")
 
 ;; Instructions using this reservation read their source operands at N1.
 (define_insn_reservation "cortex_a9_neon_vst1_1_2_regs_vst2_2_regs" 0
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs"))
+       (eq_attr "type" "neon_vst1_1_2_regs_vst2_2_regs"))
   "cortex_a9_neon_ls_2")
 
 ;; Instructions using this reservation read their source operands at N1.
 (define_insn_reservation "cortex_a9_neon_vst1_3_4_regs" 0
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vst1_3_4_regs"))
+       (eq_attr "type" "neon_vst1_3_4_regs"))
   "cortex_a9_neon_ls_3")
 
 ;; Instructions using this reservation read their source operands at N1.
 (define_insn_reservation "cortex_a9_neon_vst2_4_regs_vst3_vst4" 0
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4"))
+       (eq_attr "type" "neon_vst2_4_regs_vst3_vst4"))
   "cortex_a9_neon_ls_4")
 
 ;; Instructions using this reservation read their source operands at N1.
 (define_insn_reservation "cortex_a9_neon_vst3_vst4" 0
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vst3_vst4"))
+       (eq_attr "type" "neon_vst3_vst4"))
   "cortex_a9_neon_ls_4")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N2 on cycle 3.
 (define_insn_reservation "cortex_a9_neon_vld1_vld2_lane" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vld1_vld2_lane"))
+       (eq_attr "type" "neon_vld1_vld2_lane"))
   "cortex_a9_neon_ls_3")
 
 ;; Instructions using this reservation read their source operands at N1, and
 ;; produce a result at N2 on cycle 5.
 (define_insn_reservation "cortex_a9_neon_vld3_vld4_lane" 6
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vld3_vld4_lane"))
+       (eq_attr "type" "neon_vld3_vld4_lane"))
   "cortex_a9_neon_ls_5")
 
 ;; Instructions using this reservation read their source operands at N1.
 (define_insn_reservation "cortex_a9_neon_vst1_vst2_lane" 0
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vst1_vst2_lane"))
+       (eq_attr "type" "neon_vst1_vst2_lane"))
   "cortex_a9_neon_ls_2")
 
 ;; Instructions using this reservation read their source operands at N1.
 (define_insn_reservation "cortex_a9_neon_vst3_vst4_lane" 0
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vst3_vst4_lane"))
+       (eq_attr "type" "neon_vst3_vst4_lane"))
   "cortex_a9_neon_ls_3")
 
 ;; Instructions using this reservation produce a result at N2 on cycle 2.
 (define_insn_reservation "cortex_a9_neon_vld3_vld4_all_lanes" 3
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_vld3_vld4_all_lanes"))
+       (eq_attr "type" "neon_vld3_vld4_all_lanes"))
   "cortex_a9_neon_ls_3")
 
 ;; Instructions using this reservation produce a result at N2.
 (define_insn_reservation "cortex_a9_neon_mcr" 2
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_mcr"))
+       (eq_attr "type" "neon_mcr"))
   "cortex_a9_neon_perm")
 
 ;; Instructions using this reservation produce a result at N2.
 (define_insn_reservation "cortex_a9_neon_mcr_2_mcrr" 2
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "neon_type" "neon_mcr_2_mcrr"))
+       (eq_attr "type" "neon_mcr_2_mcrr"))
   "cortex_a9_neon_perm_2")
 
 ;; Exceptions to the default latencies.
diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md
index 11dc0b32c38..7c62d8489ae 100644
--- a/gcc/config/arm/cortex-a9.md
+++ b/gcc/config/arm/cortex-a9.md
@@ -80,17 +80,24 @@ cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1")
 ;; which can go down E2 without any problem.
 (define_insn_reservation "cortex_a9_dp" 2
   (and (eq_attr "tune" "cortexa9")
-       (and (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\
-                             mov_imm,mov_reg,mvn_imm,mvn_reg,\
-                             mov_shift_reg,mov_shift")
-            (eq_attr "neon_type" "none")))
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                        alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,rev,\
+                        shift_imm,shift_reg,\
+                        mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                        mov_shift_reg,mov_shift,\
+                        mrs,multiple,no_insn"))
   "cortex_a9_p0_default|cortex_a9_p1_default")
 
 ;; An instruction using the shifter will go down E1.
 (define_insn_reservation "cortex_a9_dp_shift" 3
    (and (eq_attr "tune" "cortexa9")
-        (eq_attr "type" "arlo_shift_reg,extend,arlo_shift,\
-                         mvn_shift,mvn_shift_reg"))
+        (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                         logic_shift_imm,logics_shift_imm,\
+                         alu_shift_reg,alus_shift_reg,\
+                         logic_shift_reg,logics_shift_reg,\
+                         extend,mvn_shift,mvn_shift_reg"))
    "cortex_a9_p0_shift | cortex_a9_p1_shift")
 
 ;; Loads have a latency of 4 cycles.
@@ -200,7 +207,7 @@ cortex_a9_store3_4, cortex_a9_store1_2,  cortex_a9_load3_4")
 ;; Pipelining for VFP instructions.
 ;; Issue happens either along load store unit or the VFP / Neon unit.
 ;; Pipeline   Instruction Classification.
-;; FPS - fcpys, ffariths, ffarithd,r_2_f,f_2_r
+;; FPS - fmov, ffariths, ffarithd,f_mcr,f_mcrr,f_mrc,f_mrrc
 ;; FP_ADD   - fadds, faddd, fcmps (1)
 ;; FPMUL   - fmul{s,d}, fmac{s,d}, ffma{s,d}
 ;; FPDIV - fdiv{s,d}
@@ -213,7 +220,8 @@ cortex_a9_store3_4, cortex_a9_store1_2,  cortex_a9_load3_4")
 ;; fmrs, fmrrd, fmstat and fmrx - The data is available after 1 cycle.
 (define_insn_reservation "cortex_a9_fps" 2
  (and (eq_attr "tune" "cortexa9")
-      (eq_attr "type" "fcpys, fconsts, fconstd, ffariths, ffarithd, r_2_f, f_2_r, f_flag"))
+      (eq_attr "type" "fmov, fconsts, fconstd, ffariths, ffarithd,\
+                       f_mcr, f_mcrr, f_mrc, f_mrrc, f_flag"))
  "ca9_issue_vfp_neon + ca9fps")
 
 (define_bypass 1
@@ -225,7 +233,7 @@ cortex_a9_store3_4, cortex_a9_store1_2,  cortex_a9_load3_4")
 
 (define_insn_reservation "cortex_a9_fadd" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "type" "fadds, faddd, f_cvt"))
+       (eq_attr "type" "fadds, faddd, f_cvt, f_cvtf2i, f_cvti2f"))
   "ca9fp_add")
 
 (define_insn_reservation "cortex_a9_fcmp" 1
@@ -263,12 +271,12 @@ cortex_a9_store3_4, cortex_a9_store1_2,  cortex_a9_load3_4")
 ;; Division pipeline description.
 (define_insn_reservation "cortex_a9_fdivs" 15
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "type" "fdivs"))
+       (eq_attr "type" "fdivs, fsqrts"))
   "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*14")
 
 (define_insn_reservation "cortex_a9_fdivd" 25
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "type" "fdivd"))
+       (eq_attr "type" "fdivd, fsqrtd"))
   "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*24")
 
 ;; Include Neon pipeline description
diff --git a/gcc/config/arm/cortex-m4-fpu.md b/gcc/config/arm/cortex-m4-fpu.md
index 4ce3f10f0de..2190938b65c 100644
--- a/gcc/config/arm/cortex-m4-fpu.md
+++ b/gcc/config/arm/cortex-m4-fpu.md
@@ -30,17 +30,17 @@
 ;; Integer instructions following VDIV or VSQRT complete out-of-order.
 (define_insn_reservation "cortex_m4_fdivs" 15
   (and (eq_attr "tune" "cortexm4")
-       (eq_attr "type" "fdivs"))
+       (eq_attr "type" "fdivs, fsqrts"))
   "cortex_m4_ex_v,cortex_m4_v*13")
 
 (define_insn_reservation "cortex_m4_vmov_1" 1
   (and (eq_attr "tune" "cortexm4")
-       (eq_attr "type" "fcpys,fconsts"))
+       (eq_attr "type" "fmov,fconsts"))
   "cortex_m4_ex_v")
 
 (define_insn_reservation "cortex_m4_vmov_2" 2
   (and (eq_attr "tune" "cortexm4")
-       (eq_attr "type" "f_2_r,r_2_f"))
+       (eq_attr "type" "f_mrc,f_mrrc,f_mcr,f_mcrr"))
   "cortex_m4_ex_v*2")
 
 (define_insn_reservation "cortex_m4_fmuls" 2
@@ -77,7 +77,7 @@
 
 (define_insn_reservation "cortex_m4_f_cvt" 2
   (and (eq_attr "tune" "cortexm4")
-       (eq_attr "type" "f_cvt"))
+       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
   "cortex_m4_ex_v")
 
 (define_insn_reservation "cortex_m4_f_load" 2
diff --git a/gcc/config/arm/cortex-m4.md b/gcc/config/arm/cortex-m4.md
index 53bd60cd98f..9ae4cc3143b 100644
--- a/gcc/config/arm/cortex-m4.md
+++ b/gcc/config/arm/cortex-m4.md
@@ -31,10 +31,18 @@
 ;; ALU and multiply is one cycle.
 (define_insn_reservation "cortex_m4_alu" 1
   (and (eq_attr "tune" "cortexm4")
-       (ior (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,extend,\
-                             arlo_shift,arlo_shift_reg,\
+       (ior (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                             alu_reg,alus_reg,logic_reg,logics_reg,\
+                             adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                             adr,bfm,rev,\
+                             shift_imm,shift_reg,extend,\
+                             alu_shift_imm,alus_shift_imm,\
+                             logic_shift_imm,logics_shift_imm,\
+                             alu_shift_reg,alus_shift_reg,\
+                             logic_shift_reg,logics_shift_reg,\
                              mov_imm,mov_reg,mov_shift,mov_shift_reg,\
-                             mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg")
+                             mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\
+                             mrs,multiple,no_insn")
 	    (ior (eq_attr "mul32" "yes")
 		 (eq_attr "mul64" "yes"))))
   "cortex_m4_ex")
diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md
index 597774dbd89..7a3ceeb15d7 100644
--- a/gcc/config/arm/cortex-r4.md
+++ b/gcc/config/arm/cortex-r4.md
@@ -78,7 +78,11 @@
 ;; for the purposes of the dual-issue constraints above.
 (define_insn_reservation "cortex_r4_alu" 2
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,mvn_imm,mvn_reg"))
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                        alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,rev,\
+                        shift_imm,shift_reg,mvn_imm,mvn_reg"))
   "cortex_r4_alu")
 
 (define_insn_reservation "cortex_r4_mov" 2
@@ -88,12 +92,17 @@
 
 (define_insn_reservation "cortex_r4_alu_shift" 2
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "type" "extend,arlo_shift,mov_shift,mvn_shift"))
+       (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
+                        logic_shift_imm,logics_shift_imm,\
+                        extend,mov_shift,mvn_shift"))
   "cortex_r4_alu")
 
 (define_insn_reservation "cortex_r4_alu_shift_reg" 2
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg"))
+       (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
+                       mov_shift_reg,mvn_shift_reg,\
+                       mrs,multiple,no_insn"))
   "cortex_r4_alu_shift_reg")
 
 ;; An ALU instruction followed by an ALU instruction with no early dep.
diff --git a/gcc/config/arm/cortex-r4f.md b/gcc/config/arm/cortex-r4f.md
index 0c0bae0cd74..1bc4249d4d1 100644
--- a/gcc/config/arm/cortex-r4f.md
+++ b/gcc/config/arm/cortex-r4f.md
@@ -48,7 +48,7 @@
 
 (define_insn_reservation "cortex_r4_fcpys" 2
  (and (eq_attr "tune_cortexr4" "yes")
-      (eq_attr "type" "fcpys"))
+      (eq_attr "type" "fmov"))
  "cortex_r4_issue_ab")
 
 (define_insn_reservation "cortex_r4_ffariths" 2
@@ -68,7 +68,7 @@
 
 (define_insn_reservation "cortex_r4_fdivs" 17
  (and (eq_attr "tune_cortexr4" "yes")
-      (eq_attr "type" "fdivs"))
+      (eq_attr "type" "fdivs, fsqrts"))
  "cortex_r4_issue_ab+cortex_r4_v1,cortex_r4_issue_a+cortex_r4_v1")
 
 (define_insn_reservation "cortex_r4_floads" 2
@@ -83,12 +83,12 @@
 
 (define_insn_reservation "cortex_r4_mcr" 2
  (and (eq_attr "tune_cortexr4" "yes")
-      (eq_attr "type" "r_2_f"))
+      (eq_attr "type" "f_mcr,f_mcrr"))
  "cortex_r4_issue_ab")
 
 (define_insn_reservation "cortex_r4_mrc" 3
  (and (eq_attr "tune_cortexr4" "yes")
-      (eq_attr "type" "f_2_r"))
+      (eq_attr "type" "f_mrc,f_mrrc"))
  "cortex_r4_issue_ab")
 
 ;; Bypasses for normal (not early) regs.
@@ -131,7 +131,7 @@
 ;; out of order.  Chances are this is not a pipelined operation.
 (define_insn_reservation "cortex_r4_fdivd" 97
  (and (eq_attr "tune_cortexr4" "yes")
-      (eq_attr "type" "fdivd"))
+      (eq_attr "type" "fdivd, fsqrtd"))
  "cortex_r4_single_issue*3")
 
 (define_insn_reservation "cortex_r4_ffarithd" 2
@@ -146,7 +146,7 @@
 
 (define_insn_reservation "cortex_r4_f_cvt" 8
  (and (eq_attr "tune_cortexr4" "yes")
-      (eq_attr "type" "f_cvt"))
+      (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
  "cortex_r4_single_issue*3")
 
 (define_insn_reservation "cortex_r4_f_memd" 8
diff --git a/gcc/config/arm/fa526.md b/gcc/config/arm/fa526.md
index 9ec92d60dc5..401abd3c0a0 100644
--- a/gcc/config/arm/fa526.md
+++ b/gcc/config/arm/fa526.md
@@ -62,13 +62,22 @@
 ;; ALU operations
 (define_insn_reservation "526_alu_op" 1
  (and (eq_attr "tune" "fa526")
-      (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\
-                       mov_imm,mov_reg,mvn_imm,mvn_reg"))
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,\
+                       mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                       mrs,multiple,no_insn"))
  "fa526_core")
 
 (define_insn_reservation "526_alu_shift_op" 2
  (and (eq_attr "tune" "fa526")
-      (eq_attr "type" "extend,arlo_shift,arlo_shift_reg,\
+      (eq_attr "type" "extend,\
+                       alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm,\
+                       alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
                        mov_shift,mov_shift_reg,\
                        mvn_shift,mvn_shift_reg"))
  "fa526_core")
diff --git a/gcc/config/arm/fa606te.md b/gcc/config/arm/fa606te.md
index e61242886d7..88347bc2d96 100644
--- a/gcc/config/arm/fa606te.md
+++ b/gcc/config/arm/fa606te.md
@@ -62,10 +62,18 @@
 ;; ALU operations
 (define_insn_reservation "606te_alu_op" 1
  (and (eq_attr "tune" "fa606te")
-      (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,
-                       extend,arlo_shift,arlo_shift_reg,\
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,extend,\
+                       alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm,\
+                       alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
                        mov_imm,mov_reg,mov_shift,mov_shift_reg,\
-                       mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg"))
+                       mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\
+                       mrs,multiple,no_insn"))
  "fa606te_core")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/fa626te.md b/gcc/config/arm/fa626te.md
index 04d2a5cf33f..e6790a21215 100644
--- a/gcc/config/arm/fa626te.md
+++ b/gcc/config/arm/fa626te.md
@@ -68,13 +68,22 @@
 ;; ALU operations
 (define_insn_reservation "626te_alu_op" 1
  (and (eq_attr "tune" "fa626,fa626te")
-      (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\
-                       mov_imm,mov_reg,mvn_imm,mvn_reg"))
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,\
+                       mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                       mrs,multiple,no_insn"))
  "fa626te_core")
 
 (define_insn_reservation "626te_alu_shift_op" 2
  (and (eq_attr "tune" "fa626,fa626te")
-      (eq_attr "type" "extend,arlo_shift,arlo_shift_reg,\
+      (eq_attr "type" "extend,\
+                       alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm,\
+                       alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg,\
                        mov_shift,mov_shift_reg,\
                        mvn_shift,mvn_shift_reg"))
  "fa626te_core")
diff --git a/gcc/config/arm/fa726te.md b/gcc/config/arm/fa726te.md
index 342b9bf5d33..d0a03981eec 100644
--- a/gcc/config/arm/fa726te.md
+++ b/gcc/config/arm/fa726te.md
@@ -86,7 +86,12 @@
 ;; Other ALU instructions 2 cycles.
 (define_insn_reservation "726te_alu_op" 1
  (and (eq_attr "tune" "fa726te")
-      (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg"))
+      (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                       alu_reg,alus_reg,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,\
+                       mrs,multiple,no_insn"))
   "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
 
 ;; ALU operations with a shift-by-register operand.
@@ -95,12 +100,14 @@
 ;; it takes 3 cycles.
 (define_insn_reservation "726te_alu_shift_op" 3
  (and (eq_attr "tune" "fa726te")
-      (eq_attr "type" "extend,arlo_shift"))
+      (eq_attr "type" "extend,alu_shift_imm,alus_shift_imm,\
+                       logic_shift_imm,logics_shift_imm"))
   "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
 
 (define_insn_reservation "726te_alu_shift_reg_op" 3
  (and (eq_attr "tune" "fa726te")
-      (eq_attr "type" "arlo_shift_reg"))
+      (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                       logic_shift_reg,logics_shift_reg"))
   "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; Multiplication Instructions
diff --git a/gcc/config/arm/fmp626.md b/gcc/config/arm/fmp626.md
index 944645b9ead..ffb68570e37 100644
--- a/gcc/config/arm/fmp626.md
+++ b/gcc/config/arm/fmp626.md
@@ -63,13 +63,19 @@
 ;; ALU operations
 (define_insn_reservation "mp626_alu_op" 1
  (and (eq_attr "tune" "fmp626")
-      (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\
+      (eq_attr "type" "alu_imm,alus_imm,alu_reg,alus_reg,\
+                       logic_imm,logics_imm,logic_reg,logics_reg,\
+                       adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                       adr,bfm,rev,\
+                       shift_imm,shift_reg,\
                        mov_imm,mov_reg,mvn_imm,mvn_reg"))
  "fmp626_core")
 
 (define_insn_reservation "mp626_alu_shift_op" 2
  (and (eq_attr "tune" "fmp626")
-      (eq_attr "type" "extend,arlo_shift,arlo_shift_reg,\
+      (eq_attr "type" "alu_shift_imm,logic_shift_imm,alus_shift_imm,logics_shift_imm,\
+                       alu_shift_reg,logic_shift_reg,alus_shift_reg,logics_shift_reg,\
+                       extend,\
                        mov_shift,mov_shift_reg,\
                        mvn_shift,mvn_shift_reg"))
  "fmp626_core")
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index d84929f3d1f..c7d7079b9de 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -391,7 +391,7 @@
 (define_mode_attr scalar_mul_constraint [(V4HI "x") (V2SI "t") (V2SF "t")
                                          (V8HI "x") (V4SI "t") (V4SF "t")])
 
-;; Predicates used for setting neon_type
+;; Predicates used for setting type for neon instructions
 
 (define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false")
                  (V4HI "false") (V8HI "false")
diff --git a/gcc/config/arm/iwmmxt.md b/gcc/config/arm/iwmmxt.md
index f1f0a5c5150..62cdae21e3f 100644
--- a/gcc/config/arm/iwmmxt.md
+++ b/gcc/config/arm/iwmmxt.md
@@ -155,7 +155,8 @@
                                  (const_int 8)
                                  (const_int 4))]
                               (const_int 4)))
-   (set_attr "type" "*,*,*,load2,store2,wmmx_wmov,wmmx_tmcrr,wmmx_tmrrc,wmmx_wldr,wmmx_wstr,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
+   (set_attr "type" "*,*,*,load2,store2,*,*,*,*,*,f_mcrr,f_mrrc,\
+                     ffarithd,f_loadd,f_stored")
    (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,*,*,*,*,*,*,1020,*")
    (set_attr "arm_neg_pool_range" "*,*,*,1008,*,*,*,*,*,*,*,*,*,1008,*")]
 )
@@ -187,7 +188,8 @@
      default:
        gcc_unreachable ();
      }"
-  [(set_attr "type"           "*,*,*,*,load1,store1,wmmx_tmcr,wmmx_tmrc,wmmx_wldr,wmmx_wstr,r_2_f,f_2_r,fcpys,f_loads,f_stores")
+  [(set_attr "type"           "*,*,*,*,load1,store1,*,*,*,*,f_mcr,f_mrc,\
+                               fmov,f_loads,f_stores")
    (set_attr "length"         "*,*,*,*,*,        *,*,*,  16,     *,*,*,*,*,*")
    (set_attr "pool_range"     "*,*,*,*,4096,     *,*,*,1024,     *,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,*,*,*,4084,     *,*,*,   *,  1012,*,*,*,1008,*")
diff --git a/gcc/config/arm/linux-eabi.h b/gcc/config/arm/linux-eabi.h
index cb0aad19c34..232c38d28ff 100644
--- a/gcc/config/arm/linux-eabi.h
+++ b/gcc/config/arm/linux-eabi.h
@@ -85,7 +85,7 @@
 		       LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC)
 
 #undef  ASAN_CC1_SPEC
-#define ASAN_CC1_SPEC "%{fsanitize=*:-funwind-tables}"
+#define ASAN_CC1_SPEC "%{%:sanitize(address):-funwind-tables}"
 
 #undef  CC1_SPEC
 #define CC1_SPEC							\
@@ -99,7 +99,7 @@
 #undef  LIB_SPEC
 #define LIB_SPEC							\
   LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC,			\
-		       GNU_USER_TARGET_LIB_SPEC " " ANDROID_LIB_SPEC)
+		    GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC)
 
 #undef	STARTFILE_SPEC
 #define STARTFILE_SPEC \
diff --git a/gcc/config/arm/linux-elf.h b/gcc/config/arm/linux-elf.h
index 488efa4ba15..475e22079fc 100644
--- a/gcc/config/arm/linux-elf.h
+++ b/gcc/config/arm/linux-elf.h
@@ -44,9 +44,9 @@
 
 #define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION " -p"
 
+/* We do not have any MULTILIB_OPTIONS specified, so there are no
+   MULTILIB_DEFAULTS.  */
 #undef  MULTILIB_DEFAULTS
-#define MULTILIB_DEFAULTS \
-	{ "marm", "mlittle-endian", "mfloat-abi=hard", "mno-thumb-interwork" }
 
 /* Now we define the strings used to build the spec file.  */
 #undef  LIB_SPEC
diff --git a/gcc/config/arm/marvell-pj4.md b/gcc/config/arm/marvell-pj4.md
index 0e2c443721e..880789600e0 100644
--- a/gcc/config/arm/marvell-pj4.md
+++ b/gcc/config/arm/marvell-pj4.md
@@ -53,26 +53,42 @@
 
 (define_insn_reservation "pj4_alu" 1
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg")
+       (eq_attr "type" "alu_imm,alus_imm,alu_reg,alus_reg,\
+                        logic_imm,logics_imm,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,rev,\
+                        shift_imm,shift_reg")
        (not (eq_attr "conds" "set")))
                                "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)")
 
 (define_insn_reservation "pj4_alu_conds" 4
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg")
+       (eq_attr "type" "alu_imm,alus_imm,alu_reg,alus_reg,\
+                        logic_imm,logics_imm,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,rev,\
+                        shift_imm,shift_reg")
        (eq_attr "conds" "set"))
                                "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)")
 
 (define_insn_reservation "pj4_shift" 1
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "arlo_shift,arlo_shift_reg,extend,\
+       (eq_attr "type" "alu_shift_imm,logic_shift_imm,\
+                        alus_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,logic_shift_reg,\
+                        alus_shift_reg,logics_shift_reg,\
+                        extend,\
                         mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg")
        (not (eq_attr "conds" "set"))
        (eq_attr "shift" "1"))  "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)")
 
 (define_insn_reservation "pj4_shift_conds" 4
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "arlo_shift,arlo_shift_reg,extend,\
+       (eq_attr "type" "alu_shift_imm,logic_shift_imm,\
+                        alus_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,logic_shift_reg,\
+                        alus_shift_reg,logics_shift_reg,\
+                        extend,\
                         mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg")
        (eq_attr "conds" "set")
        (eq_attr "shift" "1"))  "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)")
@@ -80,14 +96,20 @@
 (define_insn_reservation "pj4_alu_shift" 1
   (and (eq_attr "tune" "marvell_pj4")
        (not (eq_attr "conds" "set"))
-       (eq_attr "type" "arlo_shift,arlo_shift_reg,extend,\
+       (eq_attr "type" "alu_shift_imm,logic_shift_imm,\
+                        alus_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,logic_shift_reg,\
+                        alus_shift_reg,logics_shift_reg,\
+                        extend,\
                         mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg"))
                                "pj4_is,(pj4_alu1,nothing,pj4_w1+pj4_cp)|(pj4_alu2,nothing,pj4_w2+pj4_cp)")
 
 (define_insn_reservation "pj4_alu_shift_conds" 4
   (and (eq_attr "tune" "marvell_pj4")
        (eq_attr "conds" "set")
-       (eq_attr "type" "arlo_shift,arlo_shift_reg,extend,\
+       (eq_attr "type" "alu_shift_imm,logic_shift_imm,alus_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,logic_shift_reg,alus_shift_reg,logics_shift_reg,\
+                        extend,\
                         mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg"))
                                "pj4_is,(pj4_alu1,nothing,pj4_w1+pj4_cp)|(pj4_alu2,nothing,pj4_w2+pj4_cp)")
 
@@ -171,11 +193,11 @@
 
 (define_insn_reservation "pj4_vfp_divs" 20
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "fdivs"))       "pj4_is,nothing*2,vissue,vdiv*18,nothing")
+       (eq_attr "type" "fdivs, fsqrts"))       "pj4_is,nothing*2,vissue,vdiv*18,nothing")
 
 (define_insn_reservation "pj4_vfp_divd" 34
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "fdivd"))       "pj4_is,nothing*2,vissue,vdiv*32,nothing")
+       (eq_attr "type" "fdivd, fsqrtd"))       "pj4_is,nothing*2,vissue,vdiv*32,nothing")
 
 (define_insn_reservation "pj4_vfp_mac"  9
   (and (eq_attr "tune" "marvell_pj4")
@@ -186,8 +208,9 @@
 
 (define_insn_reservation "pj4_vfp_cpy"  4
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "fcpys,ffariths,ffarithd,fconsts,fconstd,\
-                        fcmps,fcmpd,f_cvt"))  "pj4_is,nothing*2,vissue,vfast,nothing*2")
+       (eq_attr "type" "fmov,ffariths,ffarithd,fconsts,fconstd,\
+                        fcmps,fcmpd,f_cvt,f_cvtf2i,f_cvti2f"))
+"pj4_is,nothing*2,vissue,vfast,nothing*2")
 
 ;; Enlarge latency, and wish that more nondependent insns are
 ;; scheduled immediately after VFP load.
@@ -201,9 +224,9 @@
 
 (define_insn_reservation "pj4_vfp_to_core" 7
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "f_2_r,f_flag"))       "pj4_isb,nothing,nothing,vissue,vfast,nothing*2")
+       (eq_attr "type" "f_mrc,f_mrrc,f_flag")) "pj4_isb,nothing,nothing,vissue,vfast,nothing*2")
 
 (define_insn_reservation "pj4_core_to_vfp" 2
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "r_2_f"))              "pj4_isb,pj4_alu1,pj4_w1,vissue,pj4_cp")
+       (eq_attr "type" "f_mcr,f_mcrr")) "pj4_isb,pj4_alu1,pj4_w1,vissue,pj4_cp")
 
diff --git a/gcc/config/arm/neon-schedgen.ml b/gcc/config/arm/neon-schedgen.ml
index 7dacbab2625..b3699563d48 100644
--- a/gcc/config/arm/neon-schedgen.ml
+++ b/gcc/config/arm/neon-schedgen.ml
@@ -480,7 +480,7 @@ let emit_insn_reservations core =
         Printf.printf "(define_insn_reservation \"%s_%s\" %d\n" 
             corestring producer latency;
             Printf.printf "  (and (eq_attr \"tune\" \"%s\")\n" tunestring;
-        Printf.printf "       (eq_attr \"neon_type\" \"%s\"))\n" producer;
+        Printf.printf "       (eq_attr \"type\" \"%s\"))\n" producer;
         let str =
           match reservation with
 	    Mul -> "dp" | Mul_2cycle -> "dp_2" | Mul_4cycle -> "dp_4"
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index e8d2285fa81..ae83dba5f89 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -20,7 +20,7 @@
 
 
 ;; Attribute used to permit string comparisons against <VQH_mnem> in
-;; neon_type attribute definitions.
+;; type attribute definitions.
 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
 
 (define_insn "*neon_mov<mode>"
@@ -60,8 +60,8 @@
     default: return output_move_double (operands, true, NULL);
     }
 }
- [(set_attr "neon_type" "neon_int_1,*,neon_vmov,*,neon_mrrc,neon_mcr_2_mcrr,*,*,*")
-  (set_attr "type" "*,f_stored,*,f_loadd,*,*,mov_reg,load2,store2")
+ [(set_attr "type" "neon_int_1,f_stored,neon_vmov,f_loadd,neon_mrrc,\
+                    neon_mcr_2_mcrr,mov_reg,load2,store2")
   (set_attr "length" "4,4,4,4,4,4,8,8,8")
   (set_attr "arm_pool_range"     "*,*,*,1020,*,*,*,1020,*")
   (set_attr "thumb2_pool_range"     "*,*,*,1018,*,*,*,1018,*")
@@ -104,9 +104,8 @@
     default: return output_move_quad (operands);
     }
 }
-  [(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_vmov,neon_ldm_2,\
-                          neon_mrrc,neon_mcr_2_mcrr,*,*,*")
-   (set_attr "type" "*,*,*,*,*,*,mov_reg,load4,store4")
+  [(set_attr "type" "neon_int_1,neon_stm_2,neon_vmov,neon_ldm_2,\
+                     neon_mrrc,neon_mcr_2_mcrr,mov_reg,load4,store4")
    (set_attr "length" "4,8,4,8,8,8,16,8,16")
    (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
    (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
@@ -150,7 +149,7 @@
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_ldm_2")
+  [(set_attr "type" "neon_int_1,neon_stm_2,neon_ldm_2")
    (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
 
 (define_split
@@ -242,7 +241,7 @@
   [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
 	(unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
 		     UNSPEC_MISALIGNED_ACCESS))]
-  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
 {
   /* This pattern is not permitted to fail during expansion: if both arguments
      are non-registers (e.g. memory := constant, which can be created by the
@@ -256,35 +255,35 @@
   [(set (match_operand:VDX 0 "neon_permissive_struct_operand"	"=Um")
 	(unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
 		    UNSPEC_MISALIGNED_ACCESS))]
-  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
   "vst1.<V_sz_elem>\t{%P1}, %A0"
-  [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
+  [(set_attr "type" "neon_vst1_1_2_regs_vst2_2_regs")])
 
 (define_insn "*movmisalign<mode>_neon_load"
   [(set (match_operand:VDX 0 "s_register_operand"			"=w")
 	(unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
 									" Um")]
 		    UNSPEC_MISALIGNED_ACCESS))]
-  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
   "vld1.<V_sz_elem>\t{%P0}, %A1"
-  [(set_attr "neon_type" "neon_vld1_1_2_regs")])
+  [(set_attr "type" "neon_vld1_1_2_regs")])
 
 (define_insn "*movmisalign<mode>_neon_store"
   [(set (match_operand:VQX 0 "neon_permissive_struct_operand"  "=Um")
 	(unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
 		    UNSPEC_MISALIGNED_ACCESS))]
-  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
   "vst1.<V_sz_elem>\t{%q1}, %A0"
-  [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
+  [(set_attr "type" "neon_vst1_1_2_regs_vst2_2_regs")])
 
 (define_insn "*movmisalign<mode>_neon_load"
   [(set (match_operand:VQX 0 "s_register_operand"			"=w")
 	(unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
 									" Um")]
 		    UNSPEC_MISALIGNED_ACCESS))]
-  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
   "vld1.<V_sz_elem>\t{%q0}, %A1"
-  [(set_attr "neon_type" "neon_vld1_1_2_regs")])
+  [(set_attr "type" "neon_vld1_1_2_regs")])
 
 (define_insn "vec_set<mode>_internal"
   [(set (match_operand:VD 0 "s_register_operand" "=w,w")
@@ -305,7 +304,7 @@
   else
     return "vmov.<V_sz_elem>\t%P0[%c2], %1";
 }
-  [(set_attr "neon_type" "neon_vld1_vld2_lane,neon_mcr")])
+  [(set_attr "type" "neon_vld1_vld2_lane,neon_mcr")])
 
 (define_insn "vec_set<mode>_internal"
   [(set (match_operand:VQ 0 "s_register_operand" "=w,w")
@@ -333,7 +332,7 @@
   else
     return "vmov.<V_sz_elem>\t%P0[%c2], %1";
 }
-  [(set_attr "neon_type" "neon_vld1_vld2_lane,neon_mcr")]
+  [(set_attr "type" "neon_vld1_vld2_lane,neon_mcr")]
 )
 
 (define_insn "vec_setv2di_internal"
@@ -355,7 +354,7 @@
   else
     return "vmov\t%P0, %Q1, %R1";
 }
-  [(set_attr "neon_type" "neon_vld1_1_2_regs,neon_mcr_2_mcrr")]
+  [(set_attr "type" "neon_vld1_1_2_regs,neon_mcr_2_mcrr")]
 )
 
 (define_expand "vec_set<mode>"
@@ -389,7 +388,7 @@
   else
     return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
 }
-  [(set_attr "neon_type" "neon_vst1_vst2_lane,neon_bp_simple")]
+  [(set_attr "type" "neon_vst1_vst2_lane,neon_bp_simple")]
 )
 
 (define_insn "vec_extract<mode>"
@@ -415,7 +414,7 @@
   else
     return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
 }
-  [(set_attr "neon_type" "neon_vst1_vst2_lane,neon_bp_simple")]
+  [(set_attr "type" "neon_vst1_vst2_lane,neon_bp_simple")]
 )
 
 (define_insn "vec_extractv2di"
@@ -434,7 +433,7 @@
   else
     return "vmov\t%Q0, %R0, %P1  @ v2di";
 }
-  [(set_attr "neon_type" "neon_vst1_vst2_lane,neon_int_1")]
+  [(set_attr "type" "neon_vst1_vst2_lane,neon_int_1")]
 )
 
 (define_expand "vec_init<mode>"
@@ -457,7 +456,7 @@
 		  (match_operand:VDQ 2 "s_register_operand" "w")))]
   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -484,7 +483,7 @@
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
+  [(set_attr "type" "neon_int_1,*,*,neon_int_1,*,*,*")
    (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
    (set_attr "length" "*,8,8,*,8,8,8")
    (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
@@ -496,7 +495,7 @@
                    (match_operand:VDQ 2 "s_register_operand" "w")))]
   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -521,7 +520,7 @@
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "neon_type" "neon_int_2,*,*,*,neon_int_2")
+  [(set_attr "type" "neon_int_2,*,*,*,neon_int_2")
    (set_attr "conds" "*,clob,clob,clob,*")
    (set_attr "length" "*,8,8,8,*")
    (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
@@ -533,7 +532,7 @@
                   (match_operand:VDQ 2 "s_register_operand" "w")))]
   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
   "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -555,7 +554,7 @@
 		  (match_operand:VDQ 1 "s_register_operand" "0")))]
   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
   "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vmla_ddd")
@@ -577,7 +576,7 @@
                              (match_operand:VDQ 3 "s_register_operand" "w"))))]
   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
   "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vmla_ddd")
@@ -604,7 +603,7 @@
 		 (match_operand:VCVTF 3 "register_operand" "0")))]
   "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
   "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
 	(if_then_else (match_test "<Is_d_reg>")
 		      (const_string "neon_fp_vmla_ddd")
 		      (const_string "neon_fp_vmla_qqq")))]
@@ -617,7 +616,7 @@
 		 (match_operand:VCVTF 3 "register_operand" "0")))]
   "TARGET_NEON && TARGET_FMA"
   "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
 	(if_then_else (match_test "<Is_d_reg>")
 		      (const_string "neon_fp_vmla_ddd")
 		      (const_string "neon_fp_vmla_qqq")))]
@@ -630,7 +629,7 @@
 		   (match_operand:VCVTF 3 "register_operand" "0")))]
   "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
   "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
 	(if_then_else (match_test "<Is_d_reg>")
 		      (const_string "neon_fp_vmla_ddd")
 		      (const_string "neon_fp_vmla_qqq")))]
@@ -643,7 +642,7 @@
 		   (match_operand:VCVTF 3 "register_operand" "0")))]
   "TARGET_NEON && TARGET_FMA"
   "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
 	(if_then_else (match_test "<Is_d_reg>")
 		      (const_string "neon_fp_vmla_ddd")
 		      (const_string "neon_fp_vmla_qqq")))]
@@ -656,7 +655,7 @@
 		NEON_VRINT))]
   "TARGET_NEON && TARGET_FPU_ARMV8"
   "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
-  [(set (attr "neon_type")
+  [(set (attr "type")
 	(if_then_else (match_test "<Is_d_reg>")
 		(const_string "neon_fp_vadd_ddd_vabs_dd")
 		(const_string "neon_fp_vadd_qqq_vabs_qq")))]
@@ -676,7 +675,7 @@
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "neon_type" "neon_int_1")]
+  [(set_attr "type" "neon_int_1")]
 )
 
 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
@@ -698,7 +697,7 @@
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "neon_type" "neon_int_1")]
+  [(set_attr "type" "neon_int_1")]
 )
 
 (define_insn "orn<mode>3_neon"
@@ -707,7 +706,7 @@
 		 (match_operand:VDQ 1 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "neon_type" "neon_int_1")]
+  [(set_attr "type" "neon_int_1")]
 )
 
 ;; TODO: investigate whether we should disable 
@@ -745,7 +744,7 @@
         DONE;
       }
   }"
-  [(set_attr "neon_type" "neon_int_1,*,*,*")
+  [(set_attr "type" "neon_int_1,*,*,*")
    (set_attr "length" "*,16,8,8")
    (set_attr "arch" "any,a,t2,t2")]
 )
@@ -756,7 +755,7 @@
 		 (match_operand:VDQ 1 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "neon_type" "neon_int_1")]
+  [(set_attr "type" "neon_int_1")]
 )
 
 ;; Compare to *anddi_notdi_di.
@@ -769,7 +768,7 @@
    vbic\t%P0, %P1, %P2
    #
    #"
-  [(set_attr "neon_type" "neon_int_1,*,*")
+  [(set_attr "type" "neon_int_1,*,*")
    (set_attr "length" "*,8,8")]
 )
 
@@ -779,7 +778,7 @@
 		 (match_operand:VDQ 2 "s_register_operand" "w")))]
   "TARGET_NEON"
   "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "neon_type" "neon_int_1")]
+  [(set_attr "type" "neon_int_1")]
 )
 
 (define_insn "one_cmpl<mode>2"
@@ -787,7 +786,7 @@
         (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vmvn\t%<V_reg>0, %<V_reg>1"
-  [(set_attr "neon_type" "neon_int_1")]
+  [(set_attr "type" "neon_int_1")]
 )
 
 (define_insn "abs<mode>2"
@@ -795,7 +794,7 @@
 	(abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -808,7 +807,7 @@
 	(neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -861,7 +860,7 @@
 		    (match_operand:VDQIW 2 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "neon_type" "neon_int_5")]
+  [(set_attr "type" "neon_int_5")]
 )
 
 (define_insn "*umax<mode>3_neon"
@@ -870,7 +869,7 @@
 		    (match_operand:VDQIW 2 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "neon_type" "neon_int_5")]
+  [(set_attr "type" "neon_int_5")]
 )
 
 (define_insn "*smin<mode>3_neon"
@@ -879,7 +878,7 @@
 		   (match_operand:VDQW 2 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (const_string "neon_fp_vadd_ddd_vabs_dd")
                     (const_string "neon_int_5")))]
@@ -891,7 +890,7 @@
 		   (match_operand:VDQW 2 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (const_string "neon_fp_vadd_ddd_vabs_dd")
                     (const_string "neon_int_5")))]
@@ -917,7 +916,7 @@
         default: gcc_unreachable ();
       }
   }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_vshl_ddd")
                     (const_string "neon_shift_3")))]
@@ -933,7 +932,7 @@
 					<MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
 					false);
   }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_vshl_ddd")
                     (const_string "neon_shift_3")))]
@@ -949,7 +948,7 @@
 					<MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
 					false);
   }              
-  [(set (attr "neon_type")
+  [(set (attr "type")
 	(if_then_else (match_test "<Is_d_reg>")
 		      (const_string "neon_vshl_ddd")
 		      (const_string "neon_shift_3")))]
@@ -967,7 +966,7 @@
 		     UNSPEC_ASHIFT_SIGNED))]
   "TARGET_NEON"
   "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_vshl_ddd")
                     (const_string "neon_shift_3")))]
@@ -983,7 +982,7 @@
 		     UNSPEC_ASHIFT_UNSIGNED))]
   "TARGET_NEON"
   "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_vshl_ddd")
                     (const_string "neon_shift_3")))]
@@ -1038,7 +1037,7 @@
   "@
    vld1.32\t{%P0[0]}, %A1
    vmov.32\t%P0[0], %1"
-  [(set_attr "neon_type" "neon_vld1_vld2_lane,neon_mcr")]
+  [(set_attr "type" "neon_vld1_vld2_lane,neon_mcr")]
 )
 
 (define_insn "ashldi3_neon_noclobber"
@@ -1051,7 +1050,7 @@
   "@
    vshl.u64\t%P0, %P1, %2
    vshl.u64\t%P0, %P1, %P2"
-  [(set_attr "neon_type" "neon_vshl_ddd,neon_vshl_ddd")]
+  [(set_attr "type" "neon_vshl_ddd,neon_vshl_ddd")]
 )
 
 (define_insn_and_split "ashldi3_neon"
@@ -1113,7 +1112,7 @@
 		   UNSPEC_ASHIFT_SIGNED))]
   "TARGET_NEON && reload_completed"
   "vshl.s64\t%P0, %P1, %P2"
-  [(set_attr "neon_type" "neon_vshl_ddd")]
+  [(set_attr "type" "neon_vshl_ddd")]
 )
 
 ; The shift amount needs to be negated for right-shifts
@@ -1124,7 +1123,7 @@
 		   UNSPEC_ASHIFT_UNSIGNED))]
   "TARGET_NEON && reload_completed"
   "vshl.u64\t%P0, %P1, %P2"
-  [(set_attr "neon_type" "neon_vshl_ddd")]
+  [(set_attr "type" "neon_vshl_ddd")]
 )
 
 (define_insn "ashrdi3_neon_imm_noclobber"
@@ -1134,7 +1133,7 @@
   "TARGET_NEON && reload_completed
    && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
   "vshr.s64\t%P0, %P1, %2"
-  [(set_attr "neon_type" "neon_vshl_ddd")]
+  [(set_attr "type" "neon_vshl_ddd")]
 )
 
 (define_insn "lshrdi3_neon_imm_noclobber"
@@ -1144,7 +1143,7 @@
   "TARGET_NEON && reload_completed
    && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
   "vshr.u64\t%P0, %P1, %2"
-  [(set_attr "neon_type" "neon_vshl_ddd")]
+  [(set_attr "type" "neon_vshl_ddd")]
 )
 
 ;; ashrdi3_neon
@@ -1215,7 +1214,7 @@
 		        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vaddw.<V_s_elem>\t%q0, %q2, %P1"
-  [(set_attr "neon_type" "neon_int_3")]
+  [(set_attr "type" "neon_int_3")]
 )
 
 (define_insn "widen_usum<mode>3"
@@ -1225,7 +1224,7 @@
 		        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vaddw.<V_u_elem>\t%q0, %q2, %P1"
-  [(set_attr "neon_type" "neon_int_3")]
+  [(set_attr "type" "neon_int_3")]
 )
 
 ;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
@@ -1309,7 +1308,7 @@
   "TARGET_NEON"
   "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
   [(set_attr "vqh_mnem" "<VQH_mnem>")
-   (set (attr "neon_type")
+   (set (attr "type")
       (if_then_else (eq_attr "vqh_mnem" "vadd")
                     (const_string "neon_int_1") (const_string "neon_int_5")))]
 )
@@ -1324,7 +1323,7 @@
   "TARGET_NEON && flag_unsafe_math_optimizations"
   "<VQH_mnem>.f32\t%P0, %e1, %f1"
   [(set_attr "vqh_mnem" "<VQH_mnem>")
-   (set (attr "neon_type")
+   (set (attr "type")
       (if_then_else (eq_attr "vqh_mnem" "vadd")
                     (const_string "neon_int_1") (const_string "neon_int_5")))]
 )
@@ -1341,7 +1340,7 @@
   "TARGET_NEON"
   "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
   [(set_attr "vqh_mnem" "<VQH_mnem>")
-   (set (attr "neon_type")
+   (set (attr "type")
       (if_then_else (eq_attr "vqh_mnem" "vadd")
                     (const_string "neon_int_1") (const_string "neon_int_5")))]
 )
@@ -1362,7 +1361,7 @@
   "TARGET_NEON"
   "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
   [(set_attr "vqh_mnem" "<VQH_mnem>")
-   (set (attr "neon_type")
+   (set (attr "type")
       (if_then_else (eq_attr "vqh_mnem" "vadd")
                     (const_string "neon_int_1") (const_string "neon_int_5")))]
 )
@@ -1423,7 +1422,7 @@
 		     UNSPEC_VPADD))]
   "TARGET_NEON && !BYTES_BIG_ENDIAN"
   "vadd.i64\t%e0, %e1, %f1"
-  [(set_attr "neon_type" "neon_int_1")]
+  [(set_attr "type" "neon_int_1")]
 )
 
 ;; NEON does not distinguish between signed and unsigned addition except on
@@ -1547,7 +1546,7 @@
   "TARGET_NEON"
   "vpadd.<V_if_elem>\t%P0, %P1, %P2"
   ;; Assume this schedules like vadd.
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -1563,7 +1562,7 @@
   "TARGET_NEON"
   "vpmin.<V_s_elem>\t%P0, %P1, %P2"
   ;; Assume this schedules like vmin.
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (const_string "neon_fp_vadd_ddd_vabs_dd")
                     (const_string "neon_int_5")))]
@@ -1577,7 +1576,7 @@
   "TARGET_NEON"
   "vpmax.<V_s_elem>\t%P0, %P1, %P2"
   ;; Assume this schedules like vmax.
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (const_string "neon_fp_vadd_ddd_vabs_dd")
                     (const_string "neon_int_5")))]
@@ -1591,7 +1590,7 @@
   "TARGET_NEON"
   "vpmin.<V_u_elem>\t%P0, %P1, %P2"
   ;; Assume this schedules like umin.
-  [(set_attr "neon_type" "neon_int_5")]
+  [(set_attr "type" "neon_int_5")]
 )
 
 (define_insn "neon_vpumax<mode>"
@@ -1602,7 +1601,7 @@
   "TARGET_NEON"
   "vpmax.<V_u_elem>\t%P0, %P1, %P2"
   ;; Assume this schedules like umax.
-  [(set_attr "neon_type" "neon_int_5")]
+  [(set_attr "type" "neon_int_5")]
 )
 
 ;; Saturating arithmetic
@@ -1619,7 +1618,7 @@
                    (match_operand:VD 2 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vqadd.<V_s_elem>\t%P0, %P1, %P2"
-  [(set_attr "neon_type" "neon_int_4")]
+  [(set_attr "type" "neon_int_4")]
 )
 
 (define_insn "*us_add<mode>_neon"
@@ -1628,7 +1627,7 @@
                    (match_operand:VD 2 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vqadd.<V_u_elem>\t%P0, %P1, %P2"
-  [(set_attr "neon_type" "neon_int_4")]
+  [(set_attr "type" "neon_int_4")]
 )
 
 (define_insn "*ss_sub<mode>_neon"
@@ -1637,7 +1636,7 @@
                     (match_operand:VD 2 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vqsub.<V_s_elem>\t%P0, %P1, %P2"
-  [(set_attr "neon_type" "neon_int_5")]
+  [(set_attr "type" "neon_int_5")]
 )
 
 (define_insn "*us_sub<mode>_neon"
@@ -1646,7 +1645,7 @@
                     (match_operand:VD 2 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vqsub.<V_u_elem>\t%P0, %P1, %P2"
-  [(set_attr "neon_type" "neon_int_5")]
+  [(set_attr "type" "neon_int_5")]
 )
 
 ;; Conditional instructions.  These are comparisons with conditional moves for
@@ -1671,6 +1670,7 @@
 			     ? 3 : 1;
   rtx magic_rtx = GEN_INT (magic_word);
   int inverse = 0;
+  int use_zero_form = 0;
   int swap_bsl_operands = 0;
   rtx mask = gen_reg_rtx (<V_cmp_result>mode);
   rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
@@ -1681,12 +1681,16 @@
   switch (GET_CODE (operands[3]))
     {
     case GE:
+    case GT:
     case LE:
+    case LT:
     case EQ:
-      if (!REG_P (operands[5])
-	  && (operands[5] != CONST0_RTX (<MODE>mode)))
-	operands[5] = force_reg (<MODE>mode, operands[5]);
-      break;
+      if (operands[5] == CONST0_RTX (<MODE>mode))
+	{
+	  use_zero_form = 1;
+	  break;
+	}
+      /* Fall through.  */
     default:
       if (!REG_P (operands[5]))
 	operands[5] = force_reg (<MODE>mode, operands[5]);
@@ -1737,7 +1741,26 @@
 	 a GT b -> a GT b
 	 a LE b -> b GE a
 	 a LT b -> b GT a
-	 a EQ b -> a EQ b  */
+	 a EQ b -> a EQ b
+	 Note that there also exist direct comparison against 0 forms,
+	 so catch those as a special case.  */
+      if (use_zero_form)
+	{
+	  inverse = 0;
+	  switch (GET_CODE (operands[3]))
+	    {
+	    case LT:
+	      base_comparison = gen_neon_vclt<mode>;
+	      break;
+	    case LE:
+	      base_comparison = gen_neon_vcle<mode>;
+	      break;
+	    default:
+	      /* Do nothing, other zero form cases already have the correct
+		 base_comparison.  */
+	      break;
+	    }
+	}
 
       if (!inverse)
 	emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
@@ -1914,7 +1937,7 @@
                      UNSPEC_VADD))]
   "TARGET_NEON"
   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -1934,7 +1957,7 @@
                           UNSPEC_VADDL))]
   "TARGET_NEON"
   "vaddl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
-  [(set_attr "neon_type" "neon_int_3")]
+  [(set_attr "type" "neon_int_3")]
 )
 
 (define_insn "neon_vaddw<mode>"
@@ -1945,7 +1968,7 @@
                           UNSPEC_VADDW))]
   "TARGET_NEON"
   "vaddw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
-  [(set_attr "neon_type" "neon_int_2")]
+  [(set_attr "type" "neon_int_2")]
 )
 
 ; vhadd and vrhadd.
@@ -1958,7 +1981,7 @@
 		      UNSPEC_VHADD))]
   "TARGET_NEON"
   "v%O3hadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "neon_type" "neon_int_4")]
+  [(set_attr "type" "neon_int_4")]
 )
 
 (define_insn "neon_vqadd<mode>"
@@ -1969,7 +1992,7 @@
                      UNSPEC_VQADD))]
   "TARGET_NEON"
   "vqadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "neon_type" "neon_int_4")]
+  [(set_attr "type" "neon_int_4")]
 )
 
 (define_insn "neon_vaddhn<mode>"
@@ -1980,7 +2003,7 @@
                            UNSPEC_VADDHN))]
   "TARGET_NEON"
   "v%O3addhn.<V_if_elem>\t%P0, %q1, %q2"
-  [(set_attr "neon_type" "neon_int_4")]
+  [(set_attr "type" "neon_int_4")]
 )
 
 ;; We cannot replace this unspec with mul<mode>3 because of the odd 
@@ -1993,7 +2016,7 @@
 		     UNSPEC_VMUL))]
   "TARGET_NEON"
   "vmul.%F3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -2061,7 +2084,7 @@
 		    UNSPEC_VMLA))]
   "TARGET_NEON"
   "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vmla_ddd")
@@ -2085,7 +2108,7 @@
                           UNSPEC_VMLAL))]
   "TARGET_NEON"
   "vmlal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Scalar_mul_8_16>")
                    (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
@@ -2118,7 +2141,7 @@
 		    UNSPEC_VMLS))]
   "TARGET_NEON"
   "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vmla_ddd")
@@ -2143,7 +2166,7 @@
                           UNSPEC_VMLSL))]
   "TARGET_NEON"
   "vmlsl.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Scalar_mul_8_16>")
                    (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
@@ -2157,7 +2180,7 @@
                       UNSPEC_VQDMULH))]
   "TARGET_NEON"
   "vq%O3dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_d_reg>")
         (if_then_else (match_test "<Scalar_mul_8_16>")
                       (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
@@ -2176,7 +2199,7 @@
                           UNSPEC_VQDMLAL))]
   "TARGET_NEON"
   "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Scalar_mul_8_16>")
                    (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
@@ -2191,7 +2214,7 @@
                           UNSPEC_VQDMLSL))]
   "TARGET_NEON"
   "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Scalar_mul_8_16>")
                    (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
@@ -2205,7 +2228,7 @@
                           UNSPEC_VMULL))]
   "TARGET_NEON"
   "vmull.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Scalar_mul_8_16>")
                    (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
                    (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
@@ -2219,7 +2242,7 @@
                           UNSPEC_VQDMULL))]
   "TARGET_NEON"
   "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Scalar_mul_8_16>")
                    (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
                    (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
@@ -2249,7 +2272,7 @@
                      UNSPEC_VSUB))]
   "TARGET_NEON"
   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -2265,7 +2288,7 @@
                           UNSPEC_VSUBL))]
   "TARGET_NEON"
   "vsubl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
-  [(set_attr "neon_type" "neon_int_2")]
+  [(set_attr "type" "neon_int_2")]
 )
 
 (define_insn "neon_vsubw<mode>"
@@ -2276,7 +2299,7 @@
 			  UNSPEC_VSUBW))]
   "TARGET_NEON"
   "vsubw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
-  [(set_attr "neon_type" "neon_int_2")]
+  [(set_attr "type" "neon_int_2")]
 )
 
 (define_insn "neon_vqsub<mode>"
@@ -2287,7 +2310,7 @@
 		      UNSPEC_VQSUB))]
   "TARGET_NEON"
   "vqsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "neon_type" "neon_int_5")]
+  [(set_attr "type" "neon_int_5")]
 )
 
 (define_insn "neon_vhsub<mode>"
@@ -2298,7 +2321,7 @@
 		      UNSPEC_VHSUB))]
   "TARGET_NEON"
   "vhsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "neon_type" "neon_int_5")]
+  [(set_attr "type" "neon_int_5")]
 )
 
 (define_insn "neon_vsubhn<mode>"
@@ -2309,7 +2332,7 @@
                            UNSPEC_VSUBHN))]
   "TARGET_NEON"
   "v%O3subhn.<V_if_elem>\t%P0, %q1, %q2"
-  [(set_attr "neon_type" "neon_int_4")]
+  [(set_attr "type" "neon_int_4")]
 )
 
 (define_insn "neon_vceq<mode>"
@@ -2323,7 +2346,7 @@
   "@
   vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
   vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -2342,7 +2365,7 @@
   "@
   vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
   vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_float_mode>")
                    (if_then_else (match_test "<Is_d_reg>")
                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -2359,7 +2382,7 @@
           UNSPEC_VCGEU))]
   "TARGET_NEON"
   "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "neon_type" "neon_int_5")]
+  [(set_attr "type" "neon_int_5")]
 )
 
 (define_insn "neon_vcgt<mode>"
@@ -2373,7 +2396,7 @@
   "@
   vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
   vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_float_mode>")
                    (if_then_else (match_test "<Is_d_reg>")
                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -2390,7 +2413,7 @@
           UNSPEC_VCGTU))]
   "TARGET_NEON"
   "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "neon_type" "neon_int_5")]
+  [(set_attr "type" "neon_int_5")]
 )
 
 ;; VCLE and VCLT only support comparisons with immediate zero (register
@@ -2405,7 +2428,7 @@
           UNSPEC_VCLE))]
   "TARGET_NEON"
   "vcle.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -2422,7 +2445,7 @@
           UNSPEC_VCLT))]
   "TARGET_NEON"
   "vclt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (if_then_else (match_test "<Is_d_reg>")
                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -2438,7 +2461,7 @@
                                UNSPEC_VCAGE))]
   "TARGET_NEON"
   "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_d_reg>")
                    (const_string "neon_fp_vadd_ddd_vabs_dd")
                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
@@ -2452,7 +2475,7 @@
                                UNSPEC_VCAGT))]
   "TARGET_NEON"
   "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_d_reg>")
                    (const_string "neon_fp_vadd_ddd_vabs_dd")
                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
@@ -2466,7 +2489,7 @@
 		      UNSPEC_VTST))]
   "TARGET_NEON"
   "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "neon_type" "neon_int_4")]
+  [(set_attr "type" "neon_int_4")]
 )
 
 (define_insn "neon_vabd<mode>"
@@ -2477,7 +2500,7 @@
 		     UNSPEC_VABD))]
   "TARGET_NEON"
   "vabd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_float_mode>")
                    (if_then_else (match_test "<Is_d_reg>")
                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -2493,7 +2516,7 @@
                           UNSPEC_VABDL))]
   "TARGET_NEON"
   "vabdl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
-  [(set_attr "neon_type" "neon_int_5")]
+  [(set_attr "type" "neon_int_5")]
 )
 
 (define_insn "neon_vaba<mode>"
@@ -2505,7 +2528,7 @@
 		    (match_operand:VDQIW 1 "s_register_operand" "0")))]
   "TARGET_NEON"
   "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_d_reg>")
                    (const_string "neon_vaba") (const_string "neon_vaba_qqq")))]
 )
@@ -2519,7 +2542,7 @@
 			 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
   "TARGET_NEON"
   "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
-  [(set_attr "neon_type" "neon_vaba")]
+  [(set_attr "type" "neon_vaba")]
 )
 
 (define_insn "neon_vmax<mode>"
@@ -2530,7 +2553,7 @@
                      UNSPEC_VMAX))]
   "TARGET_NEON"
   "vmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
     (if_then_else (match_test "<Is_float_mode>")
                   (if_then_else (match_test "<Is_d_reg>")
                                 (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -2546,7 +2569,7 @@
                      UNSPEC_VMIN))]
   "TARGET_NEON"
   "vmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
     (if_then_else (match_test "<Is_float_mode>")
                   (if_then_else (match_test "<Is_d_reg>")
                                 (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -2574,7 +2597,7 @@
   "TARGET_NEON"
   "vpaddl.%T2%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
   ;; Assume this schedules like vaddl.
-  [(set_attr "neon_type" "neon_int_3")]
+  [(set_attr "type" "neon_int_3")]
 )
 
 (define_insn "neon_vpadal<mode>"
@@ -2586,7 +2609,7 @@
   "TARGET_NEON"
   "vpadal.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
   ;; Assume this schedules like vpadd.
-  [(set_attr "neon_type" "neon_int_1")]
+  [(set_attr "type" "neon_int_1")]
 )
 
 (define_insn "neon_vpmax<mode>"
@@ -2598,7 +2621,7 @@
   "TARGET_NEON"
   "vpmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   ;; Assume this schedules like vmax.
-  [(set (attr "neon_type")
+  [(set (attr "type")
     (if_then_else (match_test "<Is_float_mode>")
                   (const_string "neon_fp_vadd_ddd_vabs_dd")
                   (const_string "neon_int_5")))]
@@ -2613,7 +2636,7 @@
   "TARGET_NEON"
   "vpmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   ;; Assume this schedules like vmin.
-  [(set (attr "neon_type")
+  [(set (attr "type")
     (if_then_else (match_test "<Is_float_mode>")
                   (const_string "neon_fp_vadd_ddd_vabs_dd")
                   (const_string "neon_int_5")))]
@@ -2627,7 +2650,7 @@
                       UNSPEC_VRECPS))]
   "TARGET_NEON"
   "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_fp_vrecps_vrsqrts_ddd")
                     (const_string "neon_fp_vrecps_vrsqrts_qqq")))]
@@ -2641,7 +2664,7 @@
                       UNSPEC_VRSQRTS))]
   "TARGET_NEON"
   "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_fp_vrecps_vrsqrts_ddd")
                     (const_string "neon_fp_vrecps_vrsqrts_qqq")))]
@@ -2664,7 +2687,7 @@
 		      UNSPEC_VQABS))]
   "TARGET_NEON"
   "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
-  [(set_attr "neon_type" "neon_vqneg_vqabs")]
+  [(set_attr "type" "neon_vqneg_vqabs")]
 )
 
 (define_expand "neon_vneg<mode>"
@@ -2684,7 +2707,7 @@
 		      UNSPEC_VQNEG))]
   "TARGET_NEON"
   "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
-  [(set_attr "neon_type" "neon_vqneg_vqabs")]
+  [(set_attr "type" "neon_vqneg_vqabs")]
 )
 
 (define_insn "neon_vcls<mode>"
@@ -2694,7 +2717,7 @@
 		      UNSPEC_VCLS))]
   "TARGET_NEON"
   "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
-  [(set_attr "neon_type" "neon_int_1")]
+  [(set_attr "type" "neon_int_1")]
 )
 
 (define_insn "clz<mode>2"
@@ -2702,7 +2725,7 @@
         (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
-  [(set_attr "neon_type" "neon_int_1")]
+  [(set_attr "type" "neon_int_1")]
 )
 
 (define_expand "neon_vclz<mode>"
@@ -2720,7 +2743,7 @@
         (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
-  [(set_attr "neon_type" "neon_int_1")]
+  [(set_attr "type" "neon_int_1")]
 )
 
 (define_expand "neon_vcnt<mode>"
@@ -2740,7 +2763,7 @@
                     UNSPEC_VRECPE))]
   "TARGET_NEON"
   "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_fp_vadd_ddd_vabs_dd")
                     (const_string "neon_fp_vadd_qqq_vabs_qq")))]
@@ -2753,7 +2776,7 @@
                     UNSPEC_VRSQRTE))]
   "TARGET_NEON"
   "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_fp_vadd_ddd_vabs_dd")
                     (const_string "neon_fp_vadd_qqq_vabs_qq")))]
@@ -2785,7 +2808,7 @@
     }
   return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
 }
-  [(set_attr "neon_type" "neon_bp_simple")]
+  [(set_attr "type" "neon_bp_simple")]
 )
 
 (define_insn "neon_vget_lane<mode>_zext_internal"
@@ -2804,7 +2827,7 @@
     }
   return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
 }
-  [(set_attr "neon_type" "neon_bp_simple")]
+  [(set_attr "type" "neon_bp_simple")]
 )
 
 (define_insn "neon_vget_lane<mode>_sext_internal"
@@ -2831,7 +2854,7 @@
 
   return "";
 }
-  [(set_attr "neon_type" "neon_bp_simple")]
+  [(set_attr "type" "neon_bp_simple")]
 )
 
 (define_insn "neon_vget_lane<mode>_zext_internal"
@@ -2858,7 +2881,7 @@
 
   return "";
 }
-  [(set_attr "neon_type" "neon_bp_simple")]
+  [(set_attr "type" "neon_bp_simple")]
 )
 
 (define_expand "neon_vget_lane<mode>"
@@ -2991,7 +3014,7 @@
   "TARGET_NEON"
   "vdup.<V_sz_elem>\t%<V_reg>0, %1"
   ;; Assume this schedules like vmov.
-  [(set_attr "neon_type" "neon_bp_simple")]
+  [(set_attr "type" "neon_bp_simple")]
 )
 
 (define_insn "neon_vdup_n<mode>"
@@ -3002,7 +3025,7 @@
   vdup.<V_sz_elem>\t%<V_reg>0, %1
   vdup.<V_sz_elem>\t%<V_reg>0, %y1"
   ;; Assume this schedules like vmov.
-  [(set_attr "neon_type" "neon_bp_simple")]
+  [(set_attr "type" "neon_bp_simple")]
 )
 
 (define_expand "neon_vdup_ndi"
@@ -3023,7 +3046,7 @@
   vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
   vmov\t%e0, %P1\;vmov\t%f0, %P1"
   [(set_attr "length" "8")
-   (set_attr "neon_type" "neon_bp_simple")]
+   (set_attr "type" "neon_bp_simple")]
 )
 
 (define_insn "neon_vdup_lane<mode>_internal"
@@ -3046,7 +3069,7 @@
     return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
 }
   ;; Assume this schedules like vmov.
-  [(set_attr "neon_type" "neon_bp_simple")]
+  [(set_attr "type" "neon_bp_simple")]
 )
 
 (define_expand "neon_vdup_lane<mode>"
@@ -3101,7 +3124,7 @@
    (set (match_dup 1) (match_dup 0))]
   "TARGET_NEON && reload_completed"
   "vswp\t%<V_reg>0, %<V_reg>1"
-  [(set (attr "neon_type")
+  [(set (attr "type")
 	(if_then_else (match_test "<Is_d_reg>")
 		      (const_string "neon_bp_simple")
 		      (const_string "neon_bp_2cycle")))]
@@ -3155,7 +3178,7 @@
         (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
   "TARGET_NEON && !flag_rounding_math"
   "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_d_reg>")
                    (const_string "neon_fp_vadd_ddd_vabs_dd")
                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
@@ -3166,7 +3189,7 @@
         (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] 
   "TARGET_NEON && !flag_rounding_math"
   "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_d_reg>")
                    (const_string "neon_fp_vadd_ddd_vabs_dd")
                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
@@ -3177,7 +3200,7 @@
         (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_d_reg>")
                    (const_string "neon_fp_vadd_ddd_vabs_dd")
                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
@@ -3188,7 +3211,7 @@
         (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_d_reg>")
                    (const_string "neon_fp_vadd_ddd_vabs_dd")
                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
@@ -3201,7 +3224,7 @@
 			  UNSPEC_VCVT))]
   "TARGET_NEON"
   "vcvt.%T2%#32.f32\t%<V_reg>0, %<V_reg>1"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_d_reg>")
                    (const_string "neon_fp_vadd_ddd_vabs_dd")
                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
@@ -3214,7 +3237,7 @@
 			  UNSPEC_VCVT))]
   "TARGET_NEON"
   "vcvt.f32.%T2%#32\t%<V_reg>0, %<V_reg>1"
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_d_reg>")
                    (const_string "neon_fp_vadd_ddd_vabs_dd")
                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
@@ -3226,7 +3249,7 @@
 			  UNSPEC_VCVT))]
   "TARGET_NEON && TARGET_FP16"
   "vcvt.f32.f16\t%q0, %P1"
-  [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
+  [(set_attr "type" "neon_fp_vadd_ddd_vabs_dd")]
 )
 
 (define_insn "neon_vcvtv4hfv4sf"
@@ -3235,7 +3258,7 @@
 			  UNSPEC_VCVT))]
   "TARGET_NEON && TARGET_FP16"
   "vcvt.f16.f32\t%P0, %q1"
-  [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
+  [(set_attr "type" "neon_fp_vadd_ddd_vabs_dd")]
 )
 
 (define_insn "neon_vcvt_n<mode>"
@@ -3249,7 +3272,7 @@
   neon_const_bounds (operands[2], 1, 33);
   return "vcvt.%T3%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_d_reg>")
                    (const_string "neon_fp_vadd_ddd_vabs_dd")
                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
@@ -3266,7 +3289,7 @@
   neon_const_bounds (operands[2], 1, 33);
   return "vcvt.f32.%T3%#32\t%<V_reg>0, %<V_reg>1, %2";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_d_reg>")
                    (const_string "neon_fp_vadd_ddd_vabs_dd")
                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
@@ -3279,7 +3302,7 @@
                            UNSPEC_VMOVN))]
   "TARGET_NEON"
   "vmovn.<V_if_elem>\t%P0, %q1"
-  [(set_attr "neon_type" "neon_bp_simple")]
+  [(set_attr "type" "neon_bp_simple")]
 )
 
 (define_insn "neon_vqmovn<mode>"
@@ -3289,7 +3312,7 @@
                            UNSPEC_VQMOVN))]
   "TARGET_NEON"
   "vqmovn.%T2%#<V_sz_elem>\t%P0, %q1"
-  [(set_attr "neon_type" "neon_shift_2")]
+  [(set_attr "type" "neon_shift_2")]
 )
 
 (define_insn "neon_vqmovun<mode>"
@@ -3299,7 +3322,7 @@
                            UNSPEC_VQMOVUN))]
   "TARGET_NEON"
   "vqmovun.<V_s_elem>\t%P0, %q1"
-  [(set_attr "neon_type" "neon_shift_2")]
+  [(set_attr "type" "neon_shift_2")]
 )
 
 (define_insn "neon_vmovl<mode>"
@@ -3309,7 +3332,7 @@
                           UNSPEC_VMOVL))]
   "TARGET_NEON"
   "vmovl.%T2%#<V_sz_elem>\t%q0, %P1"
-  [(set_attr "neon_type" "neon_shift_1")]
+  [(set_attr "type" "neon_shift_1")]
 )
 
 (define_insn "neon_vmul_lane<mode>"
@@ -3325,7 +3348,7 @@
   neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_float_mode>")
                    (const_string "neon_fp_vmul_ddd")
                    (if_then_else (match_test "<Scalar_mul_8_16>")
@@ -3346,7 +3369,7 @@
   neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
   return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_float_mode>")
                    (const_string "neon_fp_vmul_qqd")
                    (if_then_else (match_test "<Scalar_mul_8_16>")
@@ -3367,7 +3390,7 @@
   neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmull.%T4%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Scalar_mul_8_16>")
                    (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar")
                    (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
@@ -3386,7 +3409,7 @@
   neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Scalar_mul_8_16>")
                    (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar")
                    (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
@@ -3405,7 +3428,7 @@
   neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vq%O4dmulh.%T4%#<V_sz_elem>\t%q0, %q1, %P2[%c3]";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Scalar_mul_8_16>")
                    (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")
                    (const_string "neon_mul_qqd_32_scalar")))]
@@ -3424,7 +3447,7 @@
   neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vq%O4dmulh.%T4%#<V_sz_elem>\t%P0, %P1, %P2[%c3]";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Scalar_mul_8_16>")
                    (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar")
                    (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
@@ -3444,7 +3467,7 @@
   neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_float_mode>")
                    (const_string "neon_fp_vmla_ddd_scalar")
                    (if_then_else (match_test "<Scalar_mul_8_16>")
@@ -3466,7 +3489,7 @@
   neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_float_mode>")
                    (const_string "neon_fp_vmla_qqq_scalar")
                    (if_then_else (match_test "<Scalar_mul_8_16>")
@@ -3488,7 +3511,7 @@
   neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmlal.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Scalar_mul_8_16>")
                    (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
@@ -3508,7 +3531,7 @@
   neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Scalar_mul_8_16>")
                    (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
@@ -3528,7 +3551,7 @@
   neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_float_mode>")
                    (const_string "neon_fp_vmla_ddd_scalar")
                    (if_then_else (match_test "<Scalar_mul_8_16>")
@@ -3550,7 +3573,7 @@
   neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Is_float_mode>")
                    (const_string "neon_fp_vmla_qqq_scalar")
                    (if_then_else (match_test "<Scalar_mul_8_16>")
@@ -3572,7 +3595,7 @@
   neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmlsl.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Scalar_mul_8_16>")
                    (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
@@ -3592,7 +3615,7 @@
   neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
      (if_then_else (match_test "<Scalar_mul_8_16>")
                    (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
@@ -3820,7 +3843,7 @@
   neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_bp_simple")
                     (const_string "neon_bp_2cycle")))]
@@ -3833,7 +3856,7 @@
                     UNSPEC_VREV64))]
   "TARGET_NEON"
   "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
-  [(set_attr "neon_type" "neon_bp_simple")]
+  [(set_attr "type" "neon_bp_simple")]
 )
 
 (define_insn "neon_vrev32<mode>"
@@ -3843,7 +3866,7 @@
                    UNSPEC_VREV32))]
   "TARGET_NEON"
   "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
-  [(set_attr "neon_type" "neon_bp_simple")]
+  [(set_attr "type" "neon_bp_simple")]
 )
 
 (define_insn "neon_vrev16<mode>"
@@ -3853,7 +3876,7 @@
                    UNSPEC_VREV16))]
   "TARGET_NEON"
   "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
-  [(set_attr "neon_type" "neon_bp_simple")]
+  [(set_attr "type" "neon_bp_simple")]
 )
 
 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
@@ -3875,7 +3898,7 @@
   vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
   vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
   vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
-  [(set_attr "neon_type" "neon_int_1")]
+  [(set_attr "type" "neon_int_1")]
 )
 
 (define_expand "neon_vbsl<mode>"
@@ -3898,7 +3921,7 @@
                       UNSPEC_VSHL))]
   "TARGET_NEON"
   "v%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_vshl_ddd")
                     (const_string "neon_shift_3")))]
@@ -3912,7 +3935,7 @@
                       UNSPEC_VQSHL))]
   "TARGET_NEON"
   "vq%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_shift_2")
                     (const_string "neon_vqshl_vrshl_vqrshl_qqq")))]
@@ -3929,7 +3952,7 @@
   neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
   return "v%O3shr.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
 }
-  [(set_attr "neon_type" "neon_shift_1")]
+  [(set_attr "type" "neon_shift_1")]
 )
 
 (define_insn "neon_vshrn_n<mode>"
@@ -3943,7 +3966,7 @@
   neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
   return "v%O3shrn.<V_if_elem>\t%P0, %q1, %2";
 }
-  [(set_attr "neon_type" "neon_shift_1")]
+  [(set_attr "type" "neon_shift_1")]
 )
 
 (define_insn "neon_vqshrn_n<mode>"
@@ -3957,7 +3980,7 @@
   neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
   return "vq%O3shrn.%T3%#<V_sz_elem>\t%P0, %q1, %2";
 }
-  [(set_attr "neon_type" "neon_shift_2")]
+  [(set_attr "type" "neon_shift_2")]
 )
 
 (define_insn "neon_vqshrun_n<mode>"
@@ -3971,7 +3994,7 @@
   neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
   return "vq%O3shrun.%T3%#<V_sz_elem>\t%P0, %q1, %2";
 }
-  [(set_attr "neon_type" "neon_shift_2")]
+  [(set_attr "type" "neon_shift_2")]
 )
 
 (define_insn "neon_vshl_n<mode>"
@@ -3985,7 +4008,7 @@
   neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
   return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
 }
-  [(set_attr "neon_type" "neon_shift_1")]
+  [(set_attr "type" "neon_shift_1")]
 )
 
 (define_insn "neon_vqshl_n<mode>"
@@ -3999,7 +4022,7 @@
   neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
   return "vqshl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
 }
-  [(set_attr "neon_type" "neon_shift_2")]
+  [(set_attr "type" "neon_shift_2")]
 )
 
 (define_insn "neon_vqshlu_n<mode>"
@@ -4013,7 +4036,7 @@
   neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
   return "vqshlu.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
 }
-  [(set_attr "neon_type" "neon_shift_2")]
+  [(set_attr "type" "neon_shift_2")]
 )
 
 (define_insn "neon_vshll_n<mode>"
@@ -4028,7 +4051,7 @@
   neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
   return "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2";
 }
-  [(set_attr "neon_type" "neon_shift_1")]
+  [(set_attr "type" "neon_shift_1")]
 )
 
 (define_insn "neon_vsra_n<mode>"
@@ -4043,7 +4066,7 @@
   neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
   return "v%O4sra.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
 }
-  [(set_attr "neon_type" "neon_vsra_vrsra")]
+  [(set_attr "type" "neon_vsra_vrsra")]
 )
 
 (define_insn "neon_vsri_n<mode>"
@@ -4057,7 +4080,7 @@
   neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
   return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_shift_1")
                     (const_string "neon_shift_3")))]
@@ -4074,7 +4097,7 @@
   neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
   return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_shift_1")
                     (const_string "neon_shift_3")))]
@@ -4087,7 +4110,7 @@
                      UNSPEC_VTBL))]
   "TARGET_NEON"
   "vtbl.8\t%P0, {%P1}, %P2"
-  [(set_attr "neon_type" "neon_bp_2cycle")]
+  [(set_attr "type" "neon_bp_2cycle")]
 )
 
 (define_insn "neon_vtbl2v8qi"
@@ -4108,7 +4131,7 @@
 
   return "";
 }
-  [(set_attr "neon_type" "neon_bp_2cycle")]
+  [(set_attr "type" "neon_bp_2cycle")]
 )
 
 (define_insn "neon_vtbl3v8qi"
@@ -4130,7 +4153,7 @@
 
   return "";
 }
-  [(set_attr "neon_type" "neon_bp_3cycle")]
+  [(set_attr "type" "neon_bp_3cycle")]
 )
 
 (define_insn "neon_vtbl4v8qi"
@@ -4153,7 +4176,7 @@
 
   return "";
 }
-  [(set_attr "neon_type" "neon_bp_3cycle")]
+  [(set_attr "type" "neon_bp_3cycle")]
 )
 
 ;; These three are used by the vec_perm infrastructure for V16QImode.
@@ -4241,7 +4264,7 @@
                      UNSPEC_VTBX))]
   "TARGET_NEON"
   "vtbx.8\t%P0, {%P2}, %P3"
-  [(set_attr "neon_type" "neon_bp_2cycle")]
+  [(set_attr "type" "neon_bp_2cycle")]
 )
 
 (define_insn "neon_vtbx2v8qi"
@@ -4263,7 +4286,7 @@
 
   return "";
 }
-  [(set_attr "neon_type" "neon_bp_2cycle")]
+  [(set_attr "type" "neon_bp_2cycle")]
 )
 
 (define_insn "neon_vtbx3v8qi"
@@ -4286,7 +4309,7 @@
 
   return "";
 }
-  [(set_attr "neon_type" "neon_bp_3cycle")]
+  [(set_attr "type" "neon_bp_3cycle")]
 )
 
 (define_insn "neon_vtbx4v8qi"
@@ -4310,7 +4333,7 @@
 
   return "";
 }
-  [(set_attr "neon_type" "neon_bp_3cycle")]
+  [(set_attr "type" "neon_bp_3cycle")]
 )
 
 (define_expand "neon_vtrn<mode>_internal"
@@ -4336,7 +4359,7 @@
                      UNSPEC_VTRN2))]
   "TARGET_NEON"
   "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_bp_simple")
                     (const_string "neon_bp_3cycle")))]
@@ -4376,7 +4399,7 @@
                      UNSPEC_VZIP2))]
   "TARGET_NEON"
   "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_bp_simple")
                     (const_string "neon_bp_3cycle")))]
@@ -4416,7 +4439,7 @@
                      UNSPEC_VUZP2))]
   "TARGET_NEON"
   "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_bp_simple")
                     (const_string "neon_bp_3cycle")))]
@@ -4535,7 +4558,7 @@
                     UNSPEC_VLD1))]
   "TARGET_NEON"
   "vld1.<V_sz_elem>\t%h0, %A1"
-  [(set_attr "neon_type" "neon_vld1_1_2_regs")]
+  [(set_attr "type" "neon_vld1_1_2_regs")]
 )
 
 (define_insn "neon_vld1_lane<mode>"
@@ -4555,7 +4578,7 @@
   else
     return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2))
                     (const_string "neon_vld1_1_2_regs")
                     (const_string "neon_vld1_vld2_lane")))]
@@ -4586,26 +4609,27 @@
   else
     return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2))
                     (const_string "neon_vld1_1_2_regs")
                     (const_string "neon_vld1_vld2_lane")))]
 )
 
 (define_insn "neon_vld1_dup<mode>"
-  [(set (match_operand:VDX 0 "s_register_operand" "=w")
-        (vec_duplicate:VDX (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+        (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
   "TARGET_NEON"
-{
-  if (GET_MODE_NUNITS (<MODE>mode) > 1)
-    return "vld1.<V_sz_elem>\t{%P0[]}, %A1";
-  else
-    return "vld1.<V_sz_elem>\t%h0, %A1";
-}
-  [(set (attr "neon_type")
-      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
-                    (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
-                    (const_string "neon_vld1_1_2_regs")))]
+  "vld1.<V_sz_elem>\t{%P0[]}, %A1"
+  [(set_attr "type" "neon_vld2_2_regs_vld1_vld2_all_lanes")]
+)
+
+;; Special case for DImode.  Treat it exactly like a simple load.
+(define_expand "neon_vld1_dupdi"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+        (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
+		   UNSPEC_VLD1))]
+  "TARGET_NEON"
+  ""
 )
 
 (define_insn "neon_vld1_dup<mode>"
@@ -4615,7 +4639,7 @@
 {
   return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
 }
-  [(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")]
+  [(set_attr "type" "neon_vld2_2_regs_vld1_vld2_all_lanes")]
 )
 
 (define_insn_and_split "neon_vld1_dupv2di"
@@ -4632,7 +4656,7 @@
     DONE;
     }
   [(set_attr "length" "8")
-   (set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")]
+   (set_attr "type" "neon_vld2_2_regs_vld1_vld2_all_lanes")]
 )
 
 (define_expand "vec_store_lanes<mode><mode>"
@@ -4647,7 +4671,7 @@
 		     UNSPEC_VST1))]
   "TARGET_NEON"
   "vst1.<V_sz_elem>\t%h1, %A0"
-  [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
+  [(set_attr "type" "neon_vst1_1_2_regs_vst2_2_regs")])
 
 (define_insn "neon_vst1_lane<mode>"
   [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
@@ -4666,7 +4690,7 @@
   else
     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 1))
                     (const_string "neon_vst1_1_2_regs_vst2_2_regs")
                     (const_string "neon_vst1_vst2_lane")))])
@@ -4696,7 +4720,7 @@
   else
     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
 }
-  [(set_attr "neon_type" "neon_vst1_vst2_lane")]
+  [(set_attr "type" "neon_vst1_vst2_lane")]
 )
 
 (define_expand "vec_load_lanesti<mode>"
@@ -4718,7 +4742,7 @@
   else
     return "vld2.<V_sz_elem>\t%h0, %A1";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
                     (const_string "neon_vld1_1_2_regs")
                     (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")))]
@@ -4738,7 +4762,7 @@
                    UNSPEC_VLD2))]
   "TARGET_NEON"
   "vld2.<V_sz_elem>\t%h0, %A1"
-  [(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")])
+  [(set_attr "type" "neon_vld2_2_regs_vld1_vld2_all_lanes")])
 
 (define_insn "neon_vld2_lane<mode>"
   [(set (match_operand:TI 0 "s_register_operand" "=w")
@@ -4762,7 +4786,7 @@
   output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vld1_vld2_lane")]
+  [(set_attr "type" "neon_vld1_vld2_lane")]
 )
 
 (define_insn "neon_vld2_lane<mode>"
@@ -4792,7 +4816,7 @@
   output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vld1_vld2_lane")]
+  [(set_attr "type" "neon_vld1_vld2_lane")]
 )
 
 (define_insn "neon_vld2_dup<mode>"
@@ -4807,7 +4831,7 @@
   else
     return "vld1.<V_sz_elem>\t%h0, %A1";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
                     (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
                     (const_string "neon_vld1_1_2_regs")))]
@@ -4832,7 +4856,7 @@
   else
     return "vst2.<V_sz_elem>\t%h1, %A0";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
                     (const_string "neon_vst1_1_2_regs_vst2_2_regs")
                     (const_string "neon_vst1_1_2_regs_vst2_2_regs")))]
@@ -4852,7 +4876,7 @@
 		   UNSPEC_VST2))]
   "TARGET_NEON"
   "vst2.<V_sz_elem>\t%h1, %A0"
-  [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]
+  [(set_attr "type" "neon_vst1_1_2_regs_vst2_2_regs")]
 )
 
 (define_insn "neon_vst2_lane<mode>"
@@ -4877,7 +4901,7 @@
   output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vst1_vst2_lane")]
+  [(set_attr "type" "neon_vst1_vst2_lane")]
 )
 
 (define_insn "neon_vst2_lane<mode>"
@@ -4907,7 +4931,7 @@
   output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vst1_vst2_lane")]
+  [(set_attr "type" "neon_vst1_vst2_lane")]
 )
 
 (define_expand "vec_load_lanesei<mode>"
@@ -4929,7 +4953,7 @@
   else
     return "vld3.<V_sz_elem>\t%h0, %A1";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
                     (const_string "neon_vld1_1_2_regs")
                     (const_string "neon_vld3_vld4")))]
@@ -4976,7 +5000,7 @@
   output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vld3_vld4")]
+  [(set_attr "type" "neon_vld3_vld4")]
 )
 
 (define_insn "neon_vld3qb<mode>"
@@ -4996,7 +5020,7 @@
   output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vld3_vld4")]
+  [(set_attr "type" "neon_vld3_vld4")]
 )
 
 (define_insn "neon_vld3_lane<mode>"
@@ -5023,7 +5047,7 @@
                    ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vld3_vld4_lane")]
+  [(set_attr "type" "neon_vld3_vld4_lane")]
 )
 
 (define_insn "neon_vld3_lane<mode>"
@@ -5055,7 +5079,7 @@
                    ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vld3_vld4_lane")]
+  [(set_attr "type" "neon_vld3_vld4_lane")]
 )
 
 (define_insn "neon_vld3_dup<mode>"
@@ -5079,7 +5103,7 @@
   else
     return "vld1.<V_sz_elem>\t%h0, %A1";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
                     (const_string "neon_vld3_vld4_all_lanes")
                     (const_string "neon_vld1_1_2_regs")))])
@@ -5103,7 +5127,7 @@
   else
     return "vst3.<V_sz_elem>\t%h1, %A0";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
                     (const_string "neon_vst1_1_2_regs_vst2_2_regs")
                     (const_string "neon_vst2_4_regs_vst3_vst4")))])
@@ -5149,7 +5173,7 @@
   output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+  [(set_attr "type" "neon_vst2_4_regs_vst3_vst4")]
 )
 
 (define_insn "neon_vst3qb<mode>"
@@ -5168,7 +5192,7 @@
   output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+  [(set_attr "type" "neon_vst2_4_regs_vst3_vst4")]
 )
 
 (define_insn "neon_vst3_lane<mode>"
@@ -5195,7 +5219,7 @@
                    ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vst3_vst4_lane")]
+  [(set_attr "type" "neon_vst3_vst4_lane")]
 )
 
 (define_insn "neon_vst3_lane<mode>"
@@ -5227,7 +5251,7 @@
                    ops);
   return "";
 }
-[(set_attr "neon_type" "neon_vst3_vst4_lane")])
+[(set_attr "type" "neon_vst3_vst4_lane")])
 
 (define_expand "vec_load_lanesoi<mode>"
   [(set (match_operand:OI 0 "s_register_operand")
@@ -5248,7 +5272,7 @@
   else
     return "vld4.<V_sz_elem>\t%h0, %A1";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
                     (const_string "neon_vld1_1_2_regs")
                     (const_string "neon_vld3_vld4")))]
@@ -5296,7 +5320,7 @@
   output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vld3_vld4")]
+  [(set_attr "type" "neon_vld3_vld4")]
 )
 
 (define_insn "neon_vld4qb<mode>"
@@ -5317,7 +5341,7 @@
   output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vld3_vld4")]
+  [(set_attr "type" "neon_vld3_vld4")]
 )
 
 (define_insn "neon_vld4_lane<mode>"
@@ -5345,7 +5369,7 @@
                    ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vld3_vld4_lane")]
+  [(set_attr "type" "neon_vld3_vld4_lane")]
 )
 
 (define_insn "neon_vld4_lane<mode>"
@@ -5378,7 +5402,7 @@
                    ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vld3_vld4_lane")]
+  [(set_attr "type" "neon_vld3_vld4_lane")]
 )
 
 (define_insn "neon_vld4_dup<mode>"
@@ -5404,7 +5428,7 @@
   else
     return "vld1.<V_sz_elem>\t%h0, %A1";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
                     (const_string "neon_vld3_vld4_all_lanes")
                     (const_string "neon_vld1_1_2_regs")))]
@@ -5429,7 +5453,7 @@
   else
     return "vst4.<V_sz_elem>\t%h1, %A0";
 }
-  [(set (attr "neon_type")
+  [(set (attr "type")
       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
                     (const_string "neon_vst1_1_2_regs_vst2_2_regs")
                     (const_string "neon_vst2_4_regs_vst3_vst4")))]
@@ -5477,7 +5501,7 @@
   output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+  [(set_attr "type" "neon_vst2_4_regs_vst3_vst4")]
 )
 
 (define_insn "neon_vst4qb<mode>"
@@ -5497,7 +5521,7 @@
   output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+  [(set_attr "type" "neon_vst2_4_regs_vst3_vst4")]
 )
 
 (define_insn "neon_vst4_lane<mode>"
@@ -5525,7 +5549,7 @@
                    ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vst3_vst4_lane")]
+  [(set_attr "type" "neon_vst3_vst4_lane")]
 )
 
 (define_insn "neon_vst4_lane<mode>"
@@ -5558,7 +5582,7 @@
                    ops);
   return "";
 }
-  [(set_attr "neon_type" "neon_vst3_vst4_lane")]
+  [(set_attr "type" "neon_vst3_vst4_lane")]
 )
 
 (define_expand "neon_vand<mode>"
@@ -5623,7 +5647,7 @@
 			  (match_operand:VU 2 "vect_par_constant_low" ""))))]
   "TARGET_NEON && !BYTES_BIG_ENDIAN"
   "vmovl.<US><V_sz_elem> %q0, %e1"
-  [(set_attr "neon_type" "neon_shift_1")]
+  [(set_attr "type" "neon_shift_1")]
 )
 
 (define_insn "neon_vec_unpack<US>_hi_<mode>"
@@ -5633,7 +5657,7 @@
 			  (match_operand:VU 2 "vect_par_constant_high" ""))))]
   "TARGET_NEON && !BYTES_BIG_ENDIAN"
   "vmovl.<US><V_sz_elem> %q0, %f1"
-  [(set_attr "neon_type" "neon_shift_1")]
+  [(set_attr "type" "neon_shift_1")]
 )
 
 (define_expand "vec_unpack<US>_hi_<mode>"
@@ -5683,7 +5707,7 @@
                            (match_dup 2)))))]
   "TARGET_NEON && !BYTES_BIG_ENDIAN"
   "vmull.<US><V_sz_elem> %q0, %e1, %e3"
-  [(set_attr "neon_type" "neon_shift_1")]
+  [(set_attr "type" "neon_shift_1")]
 )
 
 (define_expand "vec_widen_<US>mult_lo_<mode>"
@@ -5717,7 +5741,7 @@
 			    (match_dup 2)))))]
   "TARGET_NEON && !BYTES_BIG_ENDIAN"
   "vmull.<US><V_sz_elem> %q0, %f1, %f3"
-  [(set_attr "neon_type" "neon_shift_1")]
+  [(set_attr "type" "neon_shift_1")]
 )
 
 (define_expand "vec_widen_<US>mult_hi_<mode>"
@@ -5750,7 +5774,7 @@
 {
   return "vshll.<US><V_sz_elem> %q0, %P1, %2";
 }
-  [(set_attr "neon_type" "neon_shift_1")]
+  [(set_attr "type" "neon_shift_1")]
 )
 
 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
@@ -5786,7 +5810,7 @@
        (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
  "TARGET_NEON"
  "vmovl.<US><V_sz_elem> %q0, %P1"
-  [(set_attr "neon_type" "neon_shift_1")]
+  [(set_attr "type" "neon_shift_1")]
 )
 
 (define_expand "vec_unpack<US>_lo_<mode>"
@@ -5823,7 +5847,7 @@
 			   (match_operand:VDI 2 "register_operand" "w"))))]
   "TARGET_NEON"
   "vmull.<US><V_sz_elem> %q0, %P1, %P2"
-  [(set_attr "neon_type" "neon_shift_1")]
+  [(set_attr "type" "neon_shift_1")]
 )
 
 (define_expand "vec_widen_<US>mult_hi_<mode>"
@@ -5897,7 +5921,7 @@
 			(match_operand:VN 2 "register_operand" "w"))))]
  "TARGET_NEON && !BYTES_BIG_ENDIAN"
  "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
- [(set_attr "neon_type" "neon_shift_1")
+ [(set_attr "type" "neon_shift_1")
   (set_attr "length" "8")]
 )
 
@@ -5907,7 +5931,7 @@
        (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
  "TARGET_NEON && !BYTES_BIG_ENDIAN"
  "vmovn.i<V_sz_elem>\t%P0, %q1"
- [(set_attr "neon_type" "neon_shift_1")]
+ [(set_attr "type" "neon_shift_1")]
 )
 
 (define_expand "vec_pack_trunc_<mode>"
@@ -5930,7 +5954,7 @@
                            (match_operand:VDQ 2 "s_register_operand" "w"))))]
  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
  "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set (attr "neon_type")
+ [(set (attr "type")
        (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
                      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                                    (const_string "neon_fp_vadd_ddd_vabs_dd")
@@ -5945,7 +5969,7 @@
                  UNSPEC_VSUB)))]
  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
  "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set (attr "neon_type")
+ [(set (attr "type")
        (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
                      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                                    (const_string "neon_fp_vadd_ddd_vabs_dd")
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm
index 246f0f5b540..20e79ef2680 100644
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -78,6 +78,11 @@ $(srcdir)/config/arm/arm-tables.opt: $(srcdir)/config/arm/genopt.sh \
 	$(SHELL) $(srcdir)/config/arm/genopt.sh $(srcdir)/config/arm > \
 		$(srcdir)/config/arm/arm-tables.opt
 
+aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \
+    coretypes.h $(TM_H) $(TM_P_H) $(RTL_H) $(TREE_H) output.h $(C_COMMON_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/arm/aarch-common.c
+
 arm.o: $(srcdir)/config/arm/arm.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
   $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
   insn-config.h conditions.h output.h dumpfile.h \
diff --git a/gcc/config/arm/t-linux-eabi b/gcc/config/arm/t-linux-eabi
index 2f2f8ffa5e2..07e32b38de8 100644
--- a/gcc/config/arm/t-linux-eabi
+++ b/gcc/config/arm/t-linux-eabi
@@ -18,6 +18,8 @@
 
 # We do not build a Thumb multilib for Linux because the definition of
 # CLEAR_INSN_CACHE in linux-gas.h does not work in Thumb mode.
+# If you set MULTILIB_OPTIONS to a non-empty value you should also set
+# MULTILIB_DEFAULTS in linux-elf.h.
 MULTILIB_OPTIONS	=
 MULTILIB_DIRNAMES	=
 
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 8b184a80c2e..3b5944a014a 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -36,7 +36,7 @@
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "shift" "2")
-   (set_attr "type" "arlo_shift")]
+   (set_attr "type" "alu_shift_imm")]
 )
 
 ;; We use the '0' constraint for operand 1 because reload should
@@ -58,7 +58,8 @@
   ""
   [(set_attr "conds" "clob")
    (set_attr "enabled_for_depr_it" "yes,yes,no")
-   (set_attr "length" "6,6,10")]
+   (set_attr "length" "6,6,10")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*thumb2_sminsi3"
@@ -78,7 +79,8 @@
   ""
   [(set_attr "conds" "clob")
    (set_attr "enabled_for_depr_it" "yes,yes,no")
-   (set_attr "length" "6,6,10")]
+   (set_attr "length" "6,6,10")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*thumb32_umaxsi3"
@@ -98,7 +100,8 @@
   ""
   [(set_attr "conds" "clob")
    (set_attr "length" "6,6,10")
-   (set_attr "enabled_for_depr_it" "yes,yes,no")]
+   (set_attr "enabled_for_depr_it" "yes,yes,no")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*thumb2_uminsi3"
@@ -118,7 +121,8 @@
   ""
   [(set_attr "conds" "clob")
    (set_attr "length" "6,6,10")
-   (set_attr "enabled_for_depr_it" "yes,yes,no")]
+   (set_attr "enabled_for_depr_it" "yes,yes,no")
+   (set_attr "type" "multiple")]
 )
 
 ;; Thumb-2 does not have rsc, so use a clever trick with shifter operands.
@@ -143,7 +147,8 @@
     operands[1] = gen_lowpart (SImode, operands[1]);
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*thumb2_abssi2"
@@ -200,7 +205,8 @@
    (set_attr "predicable_short_it" "no")
    (set_attr "enabled_for_depr_it" "yes,yes,no")
    (set_attr "ce_count" "2")
-   (set_attr "length" "8,6,10")]
+   (set_attr "length" "8,6,10")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*thumb2_neg_abssi2"
@@ -257,7 +263,8 @@
    (set_attr "enabled_for_depr_it" "yes,yes,no")
    (set_attr "predicable_short_it" "no")
    (set_attr "ce_count" "2")
-   (set_attr "length" "8,6,10")]
+   (set_attr "length" "8,6,10")
+   (set_attr "type" "multiple")]
 )
 
 ;; We have two alternatives here for memory loads (and similarly for stores)
@@ -282,7 +289,7 @@
    ldr%?\\t%0, %1
    str%?\\t%1, %0
    str%?\\t%1, %0"
-  [(set_attr "type" "*,arlo_imm,arlo_imm,arlo_imm,*,load1,load1,store1,store1")
+  [(set_attr "type" "mov_reg,alu_imm,alu_imm,alu_imm,mov_imm,load1,load1,store1,store1")
    (set_attr "length" "2,4,2,4,4,4,4,4,4")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no")
@@ -303,7 +310,8 @@
 			     INTVAL (operands[3]));
   return \"add\\t%2, %|pc\;ldr%?\\t%0, [%2]\";
   "
-  [(set_attr "length" "4,4,6,6")]
+  [(set_attr "length" "4,4,6,6")
+   (set_attr "type" "multiple")]
 )
 
 ;; Thumb-2 always has load/store halfword instructions, so we can avoid a lot
@@ -319,12 +327,27 @@
    movw%?\\t%0, %L1\\t%@ movhi
    str%(h%)\\t%1, %0\\t%@ movhi
    ldr%(h%)\\t%0, %1\\t%@ movhi"
-  [(set_attr "type" "*,*,store1,load1")
+  [(set_attr "type" "mov_imm,mov_reg,store1,load1")
    (set_attr "predicable" "yes")
    (set_attr "pool_range" "*,*,*,4094")
    (set_attr "neg_pool_range" "*,*,*,250")]
 )
 
+(define_insn "*thumb2_storewb_pairsi"
+  [(set (match_operand:SI 0 "register_operand" "=&kr")
+	(plus:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "const_int_operand" "n")))
+   (set (mem:SI (plus:SI (match_dup 0) (match_dup 2)))
+	(match_operand:SI 3 "register_operand" "r"))
+   (set (mem:SI (plus:SI (match_dup 0)
+			 (match_operand:SI 5 "const_int_operand" "n")))
+	(match_operand:SI 4 "register_operand" "r"))]
+  "TARGET_THUMB2
+   && INTVAL (operands[5]) == INTVAL (operands[2]) + 4"
+  "strd\\t%3, %4, [%0, %2]!"
+  [(set_attr "type" "store2")]
+)
+
 (define_insn "*thumb2_cmpsi_neg_shiftsi"
   [(set (reg:CC CC_REGNUM)
 	(compare:CC (match_operand:SI 0 "s_register_operand" "r")
@@ -335,7 +358,7 @@
   "cmn%?\\t%0, %1%S3"
   [(set_attr "conds" "set")
    (set_attr "shift" "1")
-   (set_attr "type" "arlo_shift")]
+   (set_attr "type" "alus_shift_imm")]
 )
 
 (define_insn_and_split "*thumb2_mov_scc"
@@ -352,7 +375,8 @@
   ""
   [(set_attr "conds" "use")
    (set_attr "enabled_for_depr_it" "yes,no")
-   (set_attr "length" "8,10")]
+   (set_attr "length" "8,10")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*thumb2_mov_negscc"
@@ -370,7 +394,8 @@
     operands[3] = GEN_INT (~0);
   }
   [(set_attr "conds" "use")
-   (set_attr "length" "10")]
+   (set_attr "length" "10")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*thumb2_mov_negscc_strict_it"
@@ -398,7 +423,8 @@
 
   }
   [(set_attr "conds" "use")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*thumb2_mov_notscc"
@@ -417,7 +443,8 @@
     operands[4] = GEN_INT (~0);
   }
   [(set_attr "conds" "use")
-   (set_attr "length" "10")]
+   (set_attr "length" "10")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*thumb2_mov_notscc_strict_it"
@@ -439,7 +466,8 @@
                                   VOIDmode, operands[2], const0_rtx);
   }
   [(set_attr "conds" "use")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*thumb2_movsicc_insn"
@@ -499,7 +527,8 @@
   }
   [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6")
    (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes")
-   (set_attr "conds" "use")]
+   (set_attr "conds" "use")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*thumb2_movsfcc_soft_insn"
@@ -513,7 +542,8 @@
    it\\t%D3\;mov%D3\\t%0, %2
    it\\t%d3\;mov%d3\\t%0, %1"
   [(set_attr "length" "6,6")
-   (set_attr "conds" "use")]
+   (set_attr "conds" "use")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*call_reg_thumb2"
@@ -542,7 +572,8 @@
 	(match_operand:SI 0 "register_operand" "l*r"))]
   "TARGET_THUMB2"
   "bx\\t%0"
-  [(set_attr "conds" "clob")]
+  [(set_attr "conds" "clob")
+   (set_attr "type" "branch")]
 )
 ;; Don't define thumb2_load_indirect_jump because we can't guarantee label
 ;; addresses will have the thumb bit set correctly.
@@ -570,6 +601,7 @@
     operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
   }
   [(set_attr "conds" "use")
+   (set_attr "type" "multiple")
    (set (attr "length") (if_then_else (match_test "arm_restrict_it")
                                       (const_int 8)
                                       (const_int 10)))]
@@ -602,7 +634,8 @@
     operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
   }
   [(set_attr "conds" "use")
-   (set_attr "length" "6,10")]
+   (set_attr "length" "6,10")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*thumb2_ior_scc_strict_it"
@@ -615,7 +648,8 @@
    it\\t%d2\;mov%d2\\t%0, #1\;it\\t%d2\;orr%d2\\t%0, %1
    mov\\t%0, #1\;orr\\t%0, %1\;it\\t%D2\;mov%D2\\t%0, %1"
   [(set_attr "conds" "use")
-   (set_attr "length" "8")]
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*thumb2_cond_move"
@@ -664,7 +698,8 @@
     return \"\";
   "
   [(set_attr "conds" "use")
-   (set_attr "length" "6,6,10")]
+   (set_attr "length" "6,6,10")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*thumb2_cond_arith"
@@ -701,7 +736,8 @@
     return \"%i5%d4\\t%0, %1, #1\";
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "14")]
+   (set_attr "length" "14")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*thumb2_cond_arith_strict_it"
@@ -770,7 +806,8 @@
      FAIL;
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "12")]
+   (set_attr "length" "12")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*thumb2_cond_sub"
@@ -801,7 +838,8 @@
     return \"sub%d4\\t%0, %1, #1\";
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "10,14")]
+   (set_attr "length" "10,14")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn_and_split "*thumb2_negscc"
@@ -869,7 +907,8 @@
     FAIL;
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "14")]
+   (set_attr "length" "14")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*thumb2_movcond"
@@ -952,7 +991,8 @@
   return \"\";
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "10,10,14")]
+   (set_attr "length" "10,10,14")
+   (set_attr "type" "multiple")]
 )
 
 ;; Zero and sign extension instructions.
@@ -1015,7 +1055,8 @@
   "TARGET_THUMB2 && !flag_pic"
   "* return thumb2_output_casesi(operands);"
   [(set_attr "conds" "clob")
-   (set_attr "length" "16")]
+   (set_attr "length" "16")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "thumb2_casesi_internal_pic"
@@ -1033,7 +1074,8 @@
   "TARGET_THUMB2 && flag_pic"
   "* return thumb2_output_casesi(operands);"
   [(set_attr "conds" "clob")
-   (set_attr "length" "20")]
+   (set_attr "length" "20")
+   (set_attr "type" "multiple")]
 )
 
 (define_insn "*thumb2_return"
@@ -1070,7 +1112,8 @@
    && GET_CODE(operands[3]) != MINUS"
   "%I3%!\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "length" "2")]
+   (set_attr "length" "2")
+   (set_attr "type" "alu_reg")]
 )
 
 (define_insn "*thumb2_shiftsi3_short"
@@ -1087,8 +1130,8 @@
    (set_attr "shift" "1")
    (set_attr "length" "2")
    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
-		      (const_string "arlo_shift")
-		      (const_string "arlo_shift_reg")))]
+		      (const_string "alu_shift_imm")
+		      (const_string "alu_shift_reg")))]
 )
 
 (define_insn "*thumb2_mov<mode>_shortim"
@@ -1098,7 +1141,8 @@
   "TARGET_THUMB2 && reload_completed"
   "mov%!\t%0, %1"
   [(set_attr "predicable" "yes")
-   (set_attr "length" "2")]
+   (set_attr "length" "2")
+   (set_attr "type" "mov_imm")]
 )
 
 (define_insn "*thumb2_addsi_short"
@@ -1122,7 +1166,8 @@
       return \"add%!\\t%0, %1, %2\";
   "
   [(set_attr "predicable" "yes")
-   (set_attr "length" "2")]
+   (set_attr "length" "2")
+   (set_attr "type" "alu_reg")]
 )
 
 (define_insn "*thumb2_subsi_short"
@@ -1133,7 +1178,8 @@
   "TARGET_THUMB2 && reload_completed"
   "sub%!\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "length" "2")]
+   (set_attr "length" "2")
+   (set_attr "type" "alu_reg")]
 )
 
 (define_peephole2
@@ -1185,7 +1231,8 @@
       return \"adds\\t%0, %1, %2\";
   "
   [(set_attr "conds" "set")
-   (set_attr "length" "2,2,4")]
+   (set_attr "length" "2,2,4")
+   (set_attr "type" "alu_reg")]
 )
 
 (define_insn "*thumb2_addsi3_compare0_scratch"
@@ -1210,7 +1257,7 @@
   "
   [(set_attr "conds" "set")
    (set_attr "length" "2,2,4,4")
-   (set_attr "type"   "arlo_imm,*,arlo_imm,*")]
+   (set_attr "type" "alus_imm,alus_reg,alus_imm,alus_reg")]
 )
 
 (define_insn "*thumb2_mulsi_short"
@@ -1269,7 +1316,8 @@
 	         (le (minus (match_dup 1) (pc)) (const_int 128))
 	         (not (match_test "which_alternative")))
 	    (const_int 2)
-	    (const_int 8)))]
+	    (const_int 8)))
+   (set_attr "type" "branch,multiple")]
 )
 
 (define_insn "*thumb2_cbnz"
@@ -1292,7 +1340,8 @@
 	         (le (minus (match_dup 1) (pc)) (const_int 128))
 	         (not (match_test "which_alternative")))
 	    (const_int 2)
-	    (const_int 8)))]
+	    (const_int 8)))
+   (set_attr "type" "branch,multiple")]
 )
 
 (define_insn "*thumb2_one_cmplsi2_short"
@@ -1302,7 +1351,8 @@
   "TARGET_THUMB2 && reload_completed"
   "mvn%!\t%0, %1"
   [(set_attr "predicable" "yes")
-   (set_attr "length" "2")]
+   (set_attr "length" "2")
+   (set_attr "type" "mvn_reg")]
 )
 
 (define_insn "*thumb2_negsi2_short"
@@ -1312,7 +1362,8 @@
   "TARGET_THUMB2 && reload_completed"
   "neg%!\t%0, %1"
   [(set_attr "predicable" "yes")
-   (set_attr "length" "2")]
+   (set_attr "length" "2")
+   (set_attr "type" "alu_reg")]
 )
 
 (define_insn "*orsi_notsi_si"
@@ -1322,7 +1373,8 @@
   "TARGET_THUMB2"
   "orn%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "logic_reg")]
 )
 
 (define_insn "*orsi_not_shiftsi_si"
@@ -1336,7 +1388,7 @@
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "shift" "2")
-   (set_attr "type" "arlo_shift")]
+   (set_attr "type" "alu_shift_imm")]
 )
 
 (define_peephole2
diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
new file mode 100644
index 00000000000..7a96438fd48
--- /dev/null
+++ b/gcc/config/arm/types.md
@@ -0,0 +1,563 @@
+;; Instruction Classification for ARM for GNU compiler.
+
+;; Copyright (C) 1991-2013 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+; TYPE attribute is used to classify instructions for use in scheduling.
+;
+; Instruction classification:
+;
+; adc_imm            add/subtract with carry and with an immediate operand.
+; adc_reg            add/subtract with carry and no immediate operand.
+; adcs_imm           as adc_imm, setting condition flags.
+; adcs_reg           as adc_reg, setting condition flags.
+; adr                calculate address.
+; alu_ext            From ARMv8-A: any arithmetic instruction that has a
+;                    sign/zero-extended.
+;                    AArch64 Only.
+;                    source operand
+; alu_imm            any arithmetic instruction that doesn't have a shifted
+;                    operand and has an immediate operand.  This
+;                    excludes MOV, MVN and RSB(S) immediate.
+; alu_reg            any arithmetic instruction that doesn't have a shifted
+;                    or an immediate operand.  This excludes
+;                    MOV and MVN but includes MOVT.  This is also the default.
+; alu_shift_imm      any arithmetic instruction that has a source operand
+;                    shifted by a constant.  This excludes simple shifts.
+; alu_shift_reg      as alu_shift_imm, with the shift amount specified in a
+;                    register.
+; alus_ext           From ARMv8-A: as alu_ext, setting condition flags.
+;                    AArch64 Only.
+; alus_imm           as alu_imm, setting condition flags.
+; alus_reg           as alu_reg, setting condition flags.
+; alus_shift_imm     as alu_shift_imm, setting condition flags.
+; alus_shift_reg     as alu_shift_reg, setting condition flags.
+; bfm                bitfield move operation.
+; block              blockage insn, this blocks all functional units.
+; branch             branch.
+; call               subroutine call.
+; clz                count leading zeros (CLZ).
+; csel               From ARMv8-A: conditional select.
+; extend             extend instruction (SXTB, SXTH, UXTB, UXTH).
+; f_cvt              conversion between float representations.
+; f_cvtf2i           conversion between float and integral types.
+; f_cvti2f           conversion between integral and float types.
+; f_flag             transfer of co-processor flags to the CPSR.
+; f_load[d,s]        double/single load from memory.  Used for VFP unit.
+; f_mcr              transfer arm to vfp reg.
+; f_mcrr             transfer two arm regs to vfp reg.
+; f_minmax[d,s]      double/single floating point minimum/maximum.
+; f_mrc              transfer vfp to arm reg.
+; f_mrrc             transfer vfp to two arm regs.
+; f_rint[d,s]        double/single floating point rount to integral.
+; f_sel[d,s]         double/single floating byte select.
+; f_store[d,s]       double/single store to memory.  Used for VFP unit.
+; fadd[d,s]          double/single floating-point scalar addition.
+; fcmp[d,s]          double/single floating-point compare.
+; fconst[d,s]        double/single load immediate.
+; fcsel              From ARMv8-A: Floating-point conditional select.
+; fdiv[d,s]          double/single precision floating point division.
+; ffarith[d,s]       double/single floating point abs/neg/cpy.
+; ffma[d,s]          double/single floating point fused multiply-accumulate.
+; float              floating point arithmetic operation.
+; fmac[d,s]          double/single floating point multiply-accumulate.
+; fmov               floating point to floating point register move.
+; fmul[d,s]          double/single floating point multiply.
+; fsqrt[d,s]         double/single precision floating point square root.
+; load_acq           load-acquire.
+; load_byte          load byte(s) from memory to arm registers.
+; load1              load 1 word from memory to arm registers.
+; load2              load 2 words from memory to arm registers.
+; load3              load 3 words from memory to arm registers.
+; load4              load 4 words from memory to arm registers.
+; logic_imm          any logical instruction that doesn't have a shifted
+;                    operand and has an immediate operand.
+; logic_reg          any logical instruction that doesn't have a shifted
+;                    operand or an immediate operand.
+; logic_shift_imm    any logical instruction that has a source operand
+;                    shifted by a constant.  This excludes simple shifts.
+; logic_shift_reg    as logic_shift_imm, with the shift amount specified in a
+;                    register.
+; logics_imm         as logic_imm, setting condition flags.
+; logics_reg         as logic_reg, setting condition flags.
+; logics_shift_imm   as logic_shift_imm, setting condition flags.
+; logics_shift_reg   as logic_shift_reg, setting condition flags.
+; mla                integer multiply accumulate.
+; mlas               integer multiply accumulate, flag setting.
+; mov_imm            simple MOV instruction that moves an immediate to
+;                    register.  This includes MOVW, but not MOVT.
+; mov_reg            simple MOV instruction that moves a register to another
+;                    register.  This includes MOVW, but not MOVT.
+; mov_shift          simple MOV instruction, shifted operand by a constant.
+; mov_shift_reg      simple MOV instruction, shifted operand by a register.
+; mrs                system/special/co-processor register move.
+; mul                integer multiply.
+; muls               integer multiply, flag setting.
+; multiple           more than one instruction, candidate for future
+;                    splitting, or better modeling.
+; mvn_imm            inverting move instruction, immediate.
+; mvn_reg            inverting move instruction, register.
+; mvn_shift          inverting move instruction, shifted operand by a constant.
+; mvn_shift_reg      inverting move instruction, shifted operand by a register.
+; no_insn            an insn which does not represent an instruction in the
+;                    final output, thus having no impact on scheduling.
+; rbit               reverse bits.
+; rev                reverse bytes.
+; sdiv               signed division.
+; shift_imm          simple shift operation (LSL, LSR, ASR, ROR) with an
+;                    immediate.
+; shift_reg          simple shift by a register.
+; smlad              signed multiply accumulate dual.
+; smladx             signed multiply accumulate dual reverse.
+; smlal              signed multiply accumulate long.
+; smlald             signed multiply accumulate long dual.
+; smlals             signed multiply accumulate long, flag setting.
+; smlalxy            signed multiply accumulate, 16x16-bit, 64-bit accumulate.
+; smlawx             signed multiply accumulate, 32x16-bit, 32-bit accumulate.
+; smlawy             signed multiply accumulate wide, 32x16-bit,
+;                    32-bit accumulate.
+; smlaxy             signed multiply accumulate, 16x16-bit, 32-bit accumulate.
+; smlsd              signed multiply subtract dual.
+; smlsdx             signed multiply subtract dual reverse.
+; smlsld             signed multiply subtract long dual.
+; smmla              signed most significant word multiply accumulate.
+; smmul              signed most significant word multiply.
+; smmulr             signed most significant word multiply, rounded.
+; smuad              signed dual multiply add.
+; smuadx             signed dual multiply add reverse.
+; smull              signed multiply long.
+; smulls             signed multiply long, flag setting.
+; smulwy             signed multiply wide, 32x16-bit, 32-bit accumulate.
+; smulxy             signed multiply, 16x16-bit, 32-bit accumulate.
+; smusd              signed dual multiply subtract.
+; smusdx             signed dual multiply subtract reverse.
+; store_rel          store-release.
+; store1             store 1 word to memory from arm registers.
+; store2             store 2 words to memory from arm registers.
+; store3             store 3 words to memory from arm registers.
+; store4             store 4 (or more) words to memory from arm registers.
+; udiv               unsigned division.
+; umaal              unsigned multiply accumulate accumulate long.
+; umlal              unsigned multiply accumulate long.
+; umlals             unsigned multiply accumulate long, flag setting.
+; umull              unsigned multiply long.
+; umulls             unsigned multiply long, flag setting.
+; untyped            insn without type information - default, and error,
+;                    case.
+;
+; The classification below is for instructions used by the Wireless MMX
+; Technology. Each attribute value is used to classify an instruction of the
+; same name or family.
+;
+; wmmx_tandc
+; wmmx_tbcst
+; wmmx_textrc
+; wmmx_textrm
+; wmmx_tinsr
+; wmmx_tmcr
+; wmmx_tmcrr
+; wmmx_tmia
+; wmmx_tmiaph
+; wmmx_tmiaxy
+; wmmx_tmrc
+; wmmx_tmrrc
+; wmmx_tmovmsk
+; wmmx_torc
+; wmmx_torvsc
+; wmmx_wabs
+; wmmx_wdiff
+; wmmx_wacc
+; wmmx_wadd
+; wmmx_waddbhus
+; wmmx_waddsubhx
+; wmmx_waligni
+; wmmx_walignr
+; wmmx_wand
+; wmmx_wandn
+; wmmx_wavg2
+; wmmx_wavg4
+; wmmx_wcmpeq
+; wmmx_wcmpgt
+; wmmx_wmac
+; wmmx_wmadd
+; wmmx_wmax
+; wmmx_wmerge
+; wmmx_wmiawxy
+; wmmx_wmiaxy
+; wmmx_wmin
+; wmmx_wmov
+; wmmx_wmul
+; wmmx_wmulw
+; wmmx_wldr
+; wmmx_wor
+; wmmx_wpack
+; wmmx_wqmiaxy
+; wmmx_wqmulm
+; wmmx_wqmulwm
+; wmmx_wror
+; wmmx_wsad
+; wmmx_wshufh
+; wmmx_wsll
+; wmmx_wsra
+; wmmx_wsrl
+; wmmx_wstr
+; wmmx_wsub
+; wmmx_wsubaddhx
+; wmmx_wunpckeh
+; wmmx_wunpckel
+; wmmx_wunpckih
+; wmmx_wunpckil
+; wmmx_wxor
+;
+; The classification below is for NEON instructions.
+;
+; neon_bp_2cycle
+; neon_bp_3cycle
+; neon_bp_simple
+; neon_fp_vadd_ddd_vabs_dd
+; neon_fp_vadd_qqq_vabs_qq
+; neon_fp_vmla_ddd_scalar
+; neon_fp_vmla_ddd
+; neon_fp_vmla_qqq_scalar
+; neon_fp_vmla_qqq
+; neon_fp_vmul_ddd
+; neon_fp_vmul_qqd
+; neon_fp_vrecps_vrsqrts_ddd
+; neon_fp_vrecps_vrsqrts_qqq
+; neon_fp_vsum
+; neon_int_1
+; neon_int_2
+; neon_int_3
+; neon_int_4
+; neon_int_5
+; neon_ldm_2
+; neon_ldr
+; neon_mcr_2_mcrr
+; neon_mcr
+; neon_mla_ddd_16_scalar_qdd_32_16_long_scalar
+; neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long
+; neon_mla_ddd_8_16_qdd_16_8_long_32_16_long
+; neon_mla_qqq_32_qqd_32_scalar
+; neon_mla_qqq_8_16
+; neon_mrc
+; neon_mrrc
+; neon_mul_ddd_16_scalar_32_16_long_scalar
+; neon_mul_ddd_8_16_qdd_16_8_long_32_16_long
+; neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar
+; neon_mul_qqd_32_scalar
+; neon_mul_qqq_8_16_32_ddd_32
+; neon_shift_1
+; neon_shift_2
+; neon_shift_3
+; neon_stm_2
+; neon_str
+; neon_vaba_qqq
+; neon_vaba
+; neon_vld1_1_2_regs
+; neon_vld1_3_4_regs
+; neon_vld1_vld2_lane
+; neon_vld2_2_regs_vld1_vld2_all_lanes
+; neon_vld2_4_regs
+; neon_vld3_vld4_all_lanes
+; neon_vld3_vld4_lane
+; neon_vld3_vld4
+; neon_vmov
+; neon_vqneg_vqabs
+; neon_vqshl_vrshl_vqrshl_qqq
+; neon_vshl_ddd
+; neon_vsma
+; neon_vsra_vrsra
+; neon_vst1_1_2_regs_vst2_2_regs
+; neon_vst1_3_4_regs
+; neon_vst1_vst2_lane
+; neon_vst2_4_regs_vst3_vst4
+; neon_vst3_vst4_lane
+; neon_vst3_vst4
+
+(define_attr "type"
+ "adc_imm,\
+  adc_reg,\
+  adcs_imm,\
+  adcs_reg,\
+  adr,\
+  alu_ext,\
+  alu_imm,\
+  alu_reg,\
+  alu_shift_imm,\
+  alu_shift_reg,\
+  alus_ext,\
+  alus_imm,\
+  alus_reg,\
+  alus_shift_imm,\
+  alus_shift_reg,\
+  bfm,\
+  block,\
+  branch,\
+  call,\
+  clz,\
+  no_insn,\
+  csel,\
+  extend,\
+  f_cvt,\
+  f_cvtf2i,\
+  f_cvti2f,\
+  f_flag,\
+  f_loadd,\
+  f_loads,\
+  f_mcr,\
+  f_mcrr,\
+  f_minmaxd,\
+  f_minmaxs,\
+  f_mrc,\
+  f_mrrc,\
+  f_rintd,\
+  f_rints,\
+  f_seld,\
+  f_sels,\
+  f_stored,\
+  f_stores,\
+  faddd,\
+  fadds,\
+  fcmpd,\
+  fcmps,\
+  fconstd,\
+  fconsts,\
+  fcsel,\
+  fdivd,\
+  fdivs,\
+  ffarithd,\
+  ffariths,\
+  ffmad,\
+  ffmas,\
+  float,\
+  fmacd,\
+  fmacs,\
+  fmov,\
+  fmuld,\
+  fmuls,\
+  fsqrts,\
+  fsqrtd,\
+  load_acq,\
+  load_byte,\
+  load1,\
+  load2,\
+  load3,\
+  load4,\
+  logic_imm,\
+  logic_reg,\
+  logic_shift_imm,\
+  logic_shift_reg,\
+  logics_imm,\
+  logics_reg,\
+  logics_shift_imm,\
+  logics_shift_reg,\
+  mla,\
+  mlas,\
+  mov_imm,\
+  mov_reg,\
+  mov_shift,\
+  mov_shift_reg,\
+  mrs,\
+  mul,\
+  muls,\
+  multiple,\
+  mvn_imm,\
+  mvn_reg,\
+  mvn_shift,\
+  mvn_shift_reg,\
+  nop,\
+  rbit,\
+  rev,\
+  sdiv,\
+  shift_imm,\
+  shift_reg,\
+  smlad,\
+  smladx,\
+  smlal,\
+  smlald,\
+  smlals,\
+  smlalxy,\
+  smlawx,\
+  smlawy,\
+  smlaxy,\
+  smlsd,\
+  smlsdx,\
+  smlsld,\
+  smmla,\
+  smmul,\
+  smmulr,\
+  smuad,\
+  smuadx,\
+  smull,\
+  smulls,\
+  smulwy,\
+  smulxy,\
+  smusd,\
+  smusdx,\
+  store_rel,\
+  store1,\
+  store2,\
+  store3,\
+  store4,\
+  udiv,\
+  umaal,\
+  umlal,\
+  umlals,\
+  umull,\
+  umulls,\
+  untyped,\
+  wmmx_tandc,\
+  wmmx_tbcst,\
+  wmmx_textrc,\
+  wmmx_textrm,\
+  wmmx_tinsr,\
+  wmmx_tmcr,\
+  wmmx_tmcrr,\
+  wmmx_tmia,\
+  wmmx_tmiaph,\
+  wmmx_tmiaxy,\
+  wmmx_tmrc,\
+  wmmx_tmrrc,\
+  wmmx_tmovmsk,\
+  wmmx_torc,\
+  wmmx_torvsc,\
+  wmmx_wabs,\
+  wmmx_wabsdiff,\
+  wmmx_wacc,\
+  wmmx_wadd,\
+  wmmx_waddbhus,\
+  wmmx_waddsubhx,\
+  wmmx_waligni,\
+  wmmx_walignr,\
+  wmmx_wand,\
+  wmmx_wandn,\
+  wmmx_wavg2,\
+  wmmx_wavg4,\
+  wmmx_wcmpeq,\
+  wmmx_wcmpgt,\
+  wmmx_wmac,\
+  wmmx_wmadd,\
+  wmmx_wmax,\
+  wmmx_wmerge,\
+  wmmx_wmiawxy,\
+  wmmx_wmiaxy,\
+  wmmx_wmin,\
+  wmmx_wmov,\
+  wmmx_wmul,\
+  wmmx_wmulw,\
+  wmmx_wldr,\
+  wmmx_wor,\
+  wmmx_wpack,\
+  wmmx_wqmiaxy,\
+  wmmx_wqmulm,\
+  wmmx_wqmulwm,\
+  wmmx_wror,\
+  wmmx_wsad,\
+  wmmx_wshufh,\
+  wmmx_wsll,\
+  wmmx_wsra,\
+  wmmx_wsrl,\
+  wmmx_wstr,\
+  wmmx_wsub,\
+  wmmx_wsubaddhx,\
+  wmmx_wunpckeh,\
+  wmmx_wunpckel,\
+  wmmx_wunpckih,\
+  wmmx_wunpckil,\
+  wmmx_wxor,\
+  neon_bp_2cycle,\
+  neon_bp_3cycle,\
+  neon_bp_simple,\
+  neon_fp_vadd_ddd_vabs_dd,\
+  neon_fp_vadd_qqq_vabs_qq,\
+  neon_fp_vmla_ddd_scalar,\
+  neon_fp_vmla_ddd,\
+  neon_fp_vmla_qqq_scalar,\
+  neon_fp_vmla_qqq,\
+  neon_fp_vmul_ddd,\
+  neon_fp_vmul_qqd,\
+  neon_fp_vrecps_vrsqrts_ddd,\
+  neon_fp_vrecps_vrsqrts_qqq,\
+  neon_fp_vsum,\
+  neon_int_1,\
+  neon_int_2,\
+  neon_int_3,\
+  neon_int_4,\
+  neon_int_5,\
+  neon_ldm_2,\
+  neon_ldr,\
+  neon_mcr_2_mcrr,\
+  neon_mcr,\
+  neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\
+  neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\
+  neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+  neon_mla_qqq_32_qqd_32_scalar,\
+  neon_mla_qqq_8_16,\
+  neon_mrc,\
+  neon_mrrc,\
+  neon_mul_ddd_16_scalar_32_16_long_scalar,\
+  neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+  neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\
+  neon_mul_qqd_32_scalar,\
+  neon_mul_qqq_8_16_32_ddd_32,\
+  neon_shift_1,\
+  neon_shift_2,\
+  neon_shift_3,\
+  neon_stm_2,\
+  neon_str,\
+  neon_vaba_qqq,\
+  neon_vaba,\
+  neon_vld1_1_2_regs,\
+  neon_vld1_3_4_regs,\
+  neon_vld1_vld2_lane,\
+  neon_vld2_2_regs_vld1_vld2_all_lanes,\
+  neon_vld2_4_regs,\
+  neon_vld3_vld4_all_lanes,\
+  neon_vld3_vld4_lane,\
+  neon_vld3_vld4,\
+  neon_vmov,\
+  neon_vqneg_vqabs,\
+  neon_vqshl_vrshl_vqrshl_qqq,\
+  neon_vshl_ddd,\
+  neon_vsma,\
+  neon_vsra_vrsra,\
+  neon_vst1_1_2_regs_vst2_2_regs,\
+  neon_vst1_3_4_regs,\
+  neon_vst1_vst2_lane,\
+  neon_vst2_4_regs_vst3_vst4,\
+  neon_vst3_vst4_lane,\
+  neon_vst3_vst4"
+    (const_string "untyped"))
+
+; Is this an (integer side) multiply with a 32-bit (or smaller) result?
+(define_attr "mul32" "no,yes"
+  (if_then_else
+    (eq_attr "type"
+     "smulxy,smlaxy,smulwy,smlawx,mul,muls,mla,mlas,smlawy,smuad,smuadx,\
+      smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,smlald,smlsld")
+    (const_string "yes")
+    (const_string "no")))
+
+; Is this an (integer side) multiply with a 64-bit result?
+(define_attr "mul64" "no,yes"
+  (if_then_else
+    (eq_attr "type"
+     "smlalxy,umull,umulls,umaal,umlal,umlals,smull,smulls,smlal,smlals")
+    (const_string "yes")
+    (const_string "no")))
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index ef8777a900b..9318e49d9ea 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -53,8 +53,7 @@
     }
   "
   [(set_attr "predicable" "yes")
-   (set_attr "type" "mov_reg,mov_reg,mvn_imm,mov_imm,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
-   (set_attr "neon_type" "*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*")
+   (set_attr "type" "mov_reg,mov_reg,mvn_imm,mov_imm,load1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores")
    (set_attr "pool_range"     "*,*,*,*,4096,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")]
 )
@@ -101,9 +100,8 @@
   "
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no")
-   (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_reg,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
+   (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_reg,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores")
    (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4")
-   (set_attr "neon_type" "*,*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*")
    (set_attr "pool_range"     "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
    (set_attr "neg_pool_range" "*,*,*,*,*,   0,   0,*,*,*,*,*,1008,*")]
 )
@@ -146,8 +144,7 @@
       gcc_unreachable ();
     }
   "
-  [(set_attr "type" "*,*,*,*,load2,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
-   (set_attr "neon_type" "*,*,*,*,*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*")
+  [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
    (set (attr "length") (cond [(eq_attr "alternative" "1,4,5,6") (const_int 8)
                               (eq_attr "alternative" "2") (const_int 12)
                               (eq_attr "alternative" "3") (const_int 16)
@@ -195,8 +192,7 @@
       gcc_unreachable ();
     }
   "
-  [(set_attr "type" "*,*,*,*,load2,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
-   (set_attr "neon_type" "*,*,*,*,*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*")
+  [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
    (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8)
                                (eq_attr "alternative" "2") (const_int 12)
                                (eq_attr "alternative" "3") (const_int 16)
@@ -264,8 +260,8 @@
     }
   "
   [(set_attr "conds" "unconditional")
-   (set_attr "type" "*,*,load1,store1,fcpys,*,r_2_f,f_2_r,*")
-   (set_attr "neon_type" "neon_vld1_1_2_regs,neon_vst1_1_2_regs_vst2_2_regs,*,*,*,*,*,*,*")
+   (set_attr "type" "neon_vld1_1_2_regs,neon_vst1_1_2_regs_vst2_2_regs,\
+                     load1,store1,fmov,mov_reg,f_mcr,f_mrc,multiple")
    (set_attr "length" "4,4,4,4,4,4,4,4,8")]
 )
 
@@ -315,7 +311,7 @@
     }
   "
   [(set_attr "conds" "unconditional")
-   (set_attr "type" "load1,store1,fcpys,*,r_2_f,f_2_r,*")
+   (set_attr "type" "load1,store1,fmov,mov_reg,f_mcr,f_mrc,multiple")
    (set_attr "length" "4,4,4,4,4,4,8")]
 )
 
@@ -355,8 +351,7 @@
   "
   [(set_attr "predicable" "yes")
    (set_attr "type"
-     "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,mov_reg")
-   (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*")
+     "f_mcr,f_mrc,fconsts,f_loads,f_stores,load1,store1,fmov,mov_reg")
    (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")]
 )
@@ -393,8 +388,7 @@
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type"
-     "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,mov_reg")
-   (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*")
+     "f_mcr,f_mrc,fconsts,f_loads,f_stores,load1,store1,fmov,mov_reg")
    (set_attr "pool_range" "*,*,*,1018,*,4090,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
 )
@@ -434,9 +428,8 @@
       }
     }
   "
-  [(set_attr "type"
-     "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
-   (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*")
+  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,f_stored,\
+                     load2,store2,ffarithd,multiple")
    (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
 			       (eq_attr "alternative" "7")
 				(if_then_else
@@ -480,9 +473,8 @@
       }
     }
   "
-  [(set_attr "type"
-     "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
-   (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*")
+  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,\
+                     f_stored,load2,store2,ffarithd,multiple")
    (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
 			       (eq_attr "alternative" "7")
 				(if_then_else
@@ -517,8 +509,7 @@
    fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
    [(set_attr "conds" "use")
     (set_attr "length" "4,4,8,4,4,8,4,4,8")
-    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
-    (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")]
+    (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")]
 )
 
 (define_insn "*thumb2_movsfcc_vfp"
@@ -541,8 +532,7 @@
    ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
    [(set_attr "conds" "use")
     (set_attr "length" "6,6,10,6,6,10,6,6,10")
-    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
-    (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")]
+    (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")]
 )
 
 (define_insn "*movdfcc_vfp"
@@ -565,8 +555,7 @@
    fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
    [(set_attr "conds" "use")
     (set_attr "length" "4,4,8,4,4,8,4,4,8")
-    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
-    (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")]
+    (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcr,f_mrrc,f_mrrc,f_mrrc")]
 )
 
 (define_insn "*thumb2_movdfcc_vfp"
@@ -589,8 +578,7 @@
    ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
    [(set_attr "conds" "use")
     (set_attr "length" "6,6,10,6,6,10,6,6,10")
-    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
-    (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")]
+    (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcrr,f_mrrc,f_mrrc,f_mrrc")]
 )
 
 
@@ -1003,7 +991,7 @@
   "ftosizs%?\\t%0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "type" "f_cvt")]
+   (set_attr "type" "f_cvtf2i")]
 )
 
 (define_insn "*truncsidf2_vfp"
@@ -1013,7 +1001,7 @@
   "ftosizd%?\\t%0, %P1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "type" "f_cvt")]
+   (set_attr "type" "f_cvtf2i")]
 )
 
 
@@ -1024,7 +1012,7 @@
   "ftouizs%?\\t%0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "type" "f_cvt")]
+   (set_attr "type" "f_cvtf2i")]
 )
 
 (define_insn "fixuns_truncdfsi2"
@@ -1034,7 +1022,7 @@
   "ftouizd%?\\t%0, %P1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "type" "f_cvt")]
+   (set_attr "type" "f_cvtf2i")]
 )
 
 
@@ -1045,7 +1033,7 @@
   "fsitos%?\\t%0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "type" "f_cvt")]
+   (set_attr "type" "f_cvti2f")]
 )
 
 (define_insn "*floatsidf2_vfp"
@@ -1055,7 +1043,7 @@
   "fsitod%?\\t%P0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "type" "f_cvt")]
+   (set_attr "type" "f_cvti2f")]
 )
 
 
@@ -1066,7 +1054,7 @@
   "fuitos%?\\t%0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "type" "f_cvt")]
+   (set_attr "type" "f_cvti2f")]
 )
 
 (define_insn "floatunssidf2"
@@ -1076,7 +1064,7 @@
   "fuitod%?\\t%P0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "type" "f_cvt")]
+   (set_attr "type" "f_cvti2f")]
 )
 
 
@@ -1089,7 +1077,7 @@
   "fsqrts%?\\t%0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "type" "fdivs")]
+   (set_attr "type" "fsqrts")]
 )
 
 (define_insn "*sqrtdf2_vfp"
@@ -1099,7 +1087,7 @@
   "fsqrtd%?\\t%P0, %P1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "type" "fdivd")]
+   (set_attr "type" "fsqrtd")]
 )
 
 
@@ -1241,7 +1229,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math"
   "vcvt.f32.<FCVTI32typename>\\t%0, %1, %v2"
  [(set_attr "predicable" "no")
-  (set_attr "type" "f_cvt")]
+  (set_attr "type" "f_cvti2f")]
 )
 
 ;; Not the ideal way of implementing this. Ideally we would be able to split
@@ -1258,7 +1246,7 @@
   vmov.f32\\t%0, %1\;vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2
   vmov.f64\\t%P0, %1, %1\;vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2"
  [(set_attr "predicable" "no")
-  (set_attr "type" "f_cvt")
+  (set_attr "type" "f_cvti2f")
   (set_attr "length" "8")]
 )
 
diff --git a/gcc/config/arm/vfp11.md b/gcc/config/arm/vfp11.md
index b027fe6c3cd..4cfa69efc24 100644
--- a/gcc/config/arm/vfp11.md
+++ b/gcc/config/arm/vfp11.md
@@ -51,12 +51,13 @@
 
 (define_insn_reservation "vfp_ffarith" 4
  (and (eq_attr "generic_vfp" "yes")
-      (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd"))
+      (eq_attr "type" "fmov,ffariths,ffarithd,fcmps,fcmpd"))
  "fmac")
 
 (define_insn_reservation "vfp_farith" 8
  (and (eq_attr "generic_vfp" "yes")
-      (eq_attr "type" "fadds,faddd,fconsts,fconstd,f_cvt,fmuls,fmacs,ffmas"))
+      (eq_attr "type" "fadds,faddd,fconsts,fconstd,f_cvt,f_cvtf2i,f_cvti2f,\
+                       fmuls,fmacs,ffmas"))
  "fmac")
 
 (define_insn_reservation "vfp_fmul" 9
@@ -66,23 +67,23 @@
 
 (define_insn_reservation "vfp_fdivs" 19
  (and (eq_attr "generic_vfp" "yes")
-      (eq_attr "type" "fdivs"))
+      (eq_attr "type" "fdivs, fsqrts"))
  "ds*15")
 
 (define_insn_reservation "vfp_fdivd" 33
  (and (eq_attr "generic_vfp" "yes")
-      (eq_attr "type" "fdivd"))
+      (eq_attr "type" "fdivd, fsqrtd"))
  "fmac+ds*29")
 
 ;; Moves to/from arm regs also use the load/store pipeline.
 (define_insn_reservation "vfp_fload" 4
  (and (eq_attr "generic_vfp" "yes")
-      (eq_attr "type" "f_loads,f_loadd,r_2_f"))
+      (eq_attr "type" "f_loads,f_loadd,f_mcr,f_mcrr"))
  "vfp_ls")
 
 (define_insn_reservation "vfp_fstore" 4
  (and (eq_attr "generic_vfp" "yes")
-      (eq_attr "type" "f_stores,f_stored,f_2_r"))
+      (eq_attr "type" "f_stores,f_stored,f_mrc,f_mrrc"))
  "vfp_ls")
 
 (define_insn_reservation "vfp_to_cpsr" 4
diff --git a/gcc/config/avr/avr-stdint.h b/gcc/config/avr/avr-stdint.h
index 8e7278f389a..4137b0689a5 100644
--- a/gcc/config/avr/avr-stdint.h
+++ b/gcc/config/avr/avr-stdint.h
@@ -34,11 +34,11 @@ along with GCC; see the file COPYING3.  If not see
 #define SIG_ATOMIC_TYPE "char"
 
 #define INT8_TYPE "signed char"
-#define INT16_TYPE (INT_TYPE_SIZE == 16 ? "short int" : "long int")
+#define INT16_TYPE (INT_TYPE_SIZE == 16 ? "int" : "long int")
 #define INT32_TYPE (INT_TYPE_SIZE == 16 ? "long int" : "long long int")
 #define INT64_TYPE (INT_TYPE_SIZE == 16 ? "long long int" : 0)
 #define UINT8_TYPE "unsigned char"
-#define UINT16_TYPE (INT_TYPE_SIZE == 16 ? "short unsigned int" : "long unsigned int")
+#define UINT16_TYPE (INT_TYPE_SIZE == 16 ? "unsigned int" : "long unsigned int")
 #define UINT32_TYPE (INT_TYPE_SIZE == 16 ? "long unsigned int" : "long long unsigned int")
 #define UINT64_TYPE (INT_TYPE_SIZE == 16 ? "long long unsigned int" : 0)
 
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index 14a3eee7c72..f6d88856bec 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -7030,7 +7030,9 @@ avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen)
   RTX_CODE shift = UNKNOWN;
   bool sign_in_carry = false;
   bool msb_in_carry = false;
+  bool lsb_in_tmp_reg = false;
   bool lsb_in_carry = false;
+  bool frac_rounded = false;
   const char *code_ashift = "lsl %0";
 
 
@@ -7038,6 +7040,7 @@ avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen)
   /* Shorthand used below.  */                                          \
   ((sign_bytes                                                          \
     && IN_RANGE (RR, dest.regno_msb - sign_bytes + 1, dest.regno_msb))  \
+   || (offset && IN_RANGE (RR, dest.regno, dest.regno_msb))		\
    || (reg_unused_after (insn, all_regs_rtx[RR])                        \
        && !IN_RANGE (RR, dest.regno, dest.regno_msb)))
 
@@ -7112,13 +7115,119 @@ avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen)
   else
     gcc_unreachable();
 
+  /* If we need to round the fraction part, we might need to save/round it
+     before clobbering any of it in Step 1.  Also, we might to want to do
+     the rounding now to make use of LD_REGS.  */
+  if (SCALAR_INT_MODE_P (GET_MODE (xop[0]))
+      && SCALAR_ACCUM_MODE_P (GET_MODE (xop[1]))
+      && !TARGET_FRACT_CONV_TRUNC)
+    {
+      bool overlap
+	= (src.regno <=
+	   (offset ? dest.regno_msb - sign_bytes : dest.regno + zero_bytes - 1)
+	   && dest.regno - offset -1 >= dest.regno);
+      unsigned s0 = dest.regno - offset -1;
+      bool use_src = true;
+      unsigned sn;
+      unsigned copied_msb = src.regno_msb;
+      bool have_carry = false;
+
+      if (src.ibyte > dest.ibyte)
+	copied_msb -= src.ibyte - dest.ibyte;
+
+      for (sn = s0; sn <= copied_msb; sn++)
+	if (!IN_RANGE (sn, dest.regno, dest.regno_msb)
+	    && !reg_unused_after (insn, all_regs_rtx[sn]))
+	  use_src = false;
+      if (use_src && TEST_HARD_REG_BIT (reg_class_contents[LD_REGS], s0))
+	{
+	  avr_asm_len ("tst %0" CR_TAB "brpl 0f",
+		       &all_regs_rtx[src.regno_msb], plen, 2);
+	  sn = src.regno;
+	  if (sn < s0)
+	    {
+	      if (TEST_HARD_REG_BIT (reg_class_contents[LD_REGS], sn))
+		avr_asm_len ("cpi %0,1", &all_regs_rtx[sn], plen, 1);
+	      else
+		avr_asm_len ("sec" CR_TAB "cpc %0,__zero_reg__",
+			     &all_regs_rtx[sn], plen, 2);
+	      have_carry = true;
+	    }
+	  while (++sn < s0)
+	    avr_asm_len ("cpc %0,__zero_reg__", &all_regs_rtx[sn], plen, 1);
+	  avr_asm_len (have_carry ? "sbci %0,128" : "subi %0,129",
+		       &all_regs_rtx[s0], plen, 1);
+	  for (sn = src.regno + src.fbyte; sn <= copied_msb; sn++)
+	    avr_asm_len ("sbci %0,255", &all_regs_rtx[sn], plen, 1);
+	  avr_asm_len ("\n0:", NULL, plen, 0);
+	  frac_rounded = true;
+	}
+      else if (use_src && overlap)
+	{
+	  avr_asm_len ("clr __tmp_reg__" CR_TAB
+		       "sbrc %1,0" CR_TAB "dec __tmp_reg__", xop, plen, 1);
+	  sn = src.regno;
+	  if (sn < s0)
+	    {
+	      avr_asm_len ("add %0,__tmp_reg__", &all_regs_rtx[sn], plen, 1);
+	      have_carry = true;
+	    }
+	  while (++sn < s0)
+	    avr_asm_len ("adc %0,__tmp_reg__", &all_regs_rtx[sn], plen, 1);
+	  if (have_carry)
+	    avr_asm_len ("clt" CR_TAB "bld __tmp_reg__,7" CR_TAB
+			 "adc %0,__tmp_reg__",
+			 &all_regs_rtx[s0], plen, 1);
+	  else
+	    avr_asm_len ("lsr __tmp_reg" CR_TAB "add %0,__tmp_reg__",
+			 &all_regs_rtx[s0], plen, 2);
+	  for (sn = src.regno + src.fbyte; sn <= copied_msb; sn++)
+	    avr_asm_len ("adc %0,__zero_reg__", &all_regs_rtx[sn], plen, 1);
+	  frac_rounded = true;
+	}
+      else if (overlap)
+	{
+	  bool use_src
+	    = (TEST_HARD_REG_BIT (reg_class_contents[LD_REGS], s0)
+	       && (IN_RANGE (s0, dest.regno, dest.regno_msb)
+		   || reg_unused_after (insn, all_regs_rtx[s0])));
+	  xop[2] = all_regs_rtx[s0];
+	  unsigned sn = src.regno;
+	  if (!use_src || sn == s0)
+	    avr_asm_len ("mov __tmp_reg__,%2", xop, plen, 1);
+	  /* We need to consider to-be-discarded bits
+	     if the value is negative.  */
+	  if (sn < s0)
+	    {
+	      avr_asm_len ("tst %0" CR_TAB "brpl 0f",
+			   &all_regs_rtx[src.regno_msb], plen, 2);
+	      /* Test to-be-discarded bytes for any nozero bits.
+		 ??? Could use OR or SBIW to test two registers at once.  */
+	      if (sn < s0)
+		avr_asm_len ("cp %0,__zero_reg__", &all_regs_rtx[sn], plen, 1);
+	      while (++sn < s0)
+		avr_asm_len ("cpc %0,__zero_reg__", &all_regs_rtx[sn], plen, 1);
+	      /* Set bit 0 in __tmp_reg__ if any of the lower bits was set.  */
+	      if (use_src)
+		avr_asm_len ("breq 0f" CR_TAB
+			     "ori %2,1" "\n0:\t" "mov __tmp_reg__,%2",
+			     xop, plen, 3);
+	      else
+		avr_asm_len ("breq 0f" CR_TAB
+			     "set" CR_TAB "bld __tmp_reg__,0\n0:",
+			     xop, plen, 3);
+	    }
+	  lsb_in_tmp_reg = true;
+	}
+    }
+
   /* Step 1:  Clear bytes at the low end and copy payload bits from source
      ======   to destination.  */
 
   int step = offset < 0 ? 1 : -1;
   unsigned d0 = offset < 0 ? dest.regno : dest.regno_msb;
 
-  // We leared at least that number of registers.
+  // We cleared at least that number of registers.
   int clr_n = 0;
 
   for (; d0 >= dest.regno && d0 <= dest.regno_msb; d0 += step)
@@ -7208,6 +7317,7 @@ avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen)
           unsigned src_lsb = dest.regno - offset -1;
 
           if (shift == ASHIFT && src.fbyte > dest.fbyte && !lsb_in_carry
+	      && !lsb_in_tmp_reg
               && (d0 == src_lsb || d0 + stepw == src_lsb))
             {
               /* We are going to override the new LSB; store it into carry.  */
@@ -7229,7 +7339,91 @@ avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen)
     {
       unsigned s0 = dest.regno - offset -1;
 
-      if (MAY_CLOBBER (s0))
+      /* n1169 4.1.4 says:
+	 "Conversions from a fixed-point to an integer type round toward zero."
+	 Hence, converting a fract type to integer only gives a non-zero result
+	 for -1.  */
+      if (SCALAR_INT_MODE_P (GET_MODE (xop[0]))
+	  && SCALAR_FRACT_MODE_P (GET_MODE (xop[1]))
+	  && !TARGET_FRACT_CONV_TRUNC)
+	{
+	  gcc_assert (s0 == src.regno_msb);
+	  /* Check if the input is -1.  We do that by checking if negating
+	     the input causes an integer overflow.  */
+	  unsigned sn = src.regno;
+	  avr_asm_len ("cp __zero_reg__,%0", &all_regs_rtx[sn++], plen, 1);
+	  while (sn <= s0)
+	    avr_asm_len ("cpc __zero_reg__,%0", &all_regs_rtx[sn++], plen, 1);
+
+	  /* Overflow goes with set carry.  Clear carry otherwise.  */
+	  avr_asm_len ("brvs 0f" CR_TAB "clc\n0:", NULL, plen, 2);
+	}
+      /* Likewise, when converting from accumulator types to integer, we
+	 need to round up negative values.  */
+      else if (SCALAR_INT_MODE_P (GET_MODE (xop[0]))
+	       && SCALAR_ACCUM_MODE_P (GET_MODE (xop[1]))
+	       && !TARGET_FRACT_CONV_TRUNC
+	       && !frac_rounded)
+	{
+	  bool have_carry = false;
+
+	  xop[2] = all_regs_rtx[s0];
+	  if (!lsb_in_tmp_reg && !MAY_CLOBBER (s0))
+	    avr_asm_len ("mov __tmp_reg__,%2", xop, plen, 1);
+	  avr_asm_len ("tst %0" CR_TAB "brpl 0f",
+		       &all_regs_rtx[src.regno_msb], plen, 2);
+	  if (!lsb_in_tmp_reg)
+	    {
+	      unsigned sn = src.regno;
+	      if (sn < s0)
+		{
+		  avr_asm_len ("cp __zero_reg__,%0", &all_regs_rtx[sn],
+			       plen, 1);
+		  have_carry = true;
+		}
+	      while (++sn < s0)
+		avr_asm_len ("cpc __zero_reg__,%0", &all_regs_rtx[sn], plen, 1);
+	      lsb_in_tmp_reg = !MAY_CLOBBER (s0);
+	    }
+	  /* Add in C and the rounding value 127.  */
+	  /* If the destination msb is a sign byte, and in LD_REGS,
+	     grab it as a temporary.  */
+	  if (sign_bytes
+	      && TEST_HARD_REG_BIT (reg_class_contents[LD_REGS],
+				    dest.regno_msb))
+	    {
+	      xop[3] = all_regs_rtx[dest.regno_msb];
+	      avr_asm_len ("ldi %3,127", xop, plen, 1);
+	      avr_asm_len ((have_carry && lsb_in_tmp_reg ? "adc __tmp_reg__,%3"
+			   : have_carry ? "adc %2,%3"
+			   : lsb_in_tmp_reg ? "add __tmp_reg__,%3"
+			   : "add %2,%3"),
+			   xop, plen, 1);
+	    }
+	  else
+	    {
+	      /* Fall back to use __zero_reg__ as a temporary.  */
+	      avr_asm_len ("dec __zero_reg__", NULL, plen, 1);
+	      if (have_carry)
+		avr_asm_len ("clt" CR_TAB "bld __zero_reg__,7", NULL, plen, 2);
+	      else
+		avr_asm_len ("lsr __zero_reg__", NULL, plen, 1);
+	      avr_asm_len ((have_carry && lsb_in_tmp_reg
+			   ? "adc __tmp_reg__,__zero_reg__"
+			   : have_carry ? "adc %2,__zero_reg__"
+			   : lsb_in_tmp_reg ? "add __tmp_reg__,__zero_reg__"
+			   : "add %2,__zero_reg__"),
+			   xop, plen, 1);
+	      avr_asm_len ("eor __zero_reg__,__zero_reg__", NULL, plen, 1);
+	    }
+	  for (d0 = dest.regno + zero_bytes;
+	       d0 <= dest.regno_msb - sign_bytes; d0++)
+	    avr_asm_len ("adc %0,__zero_reg__", &all_regs_rtx[d0], plen, 1);
+	  avr_asm_len (lsb_in_tmp_reg
+		       ? "\n0:\t" "lsl __tmp_reg__" : "\n0:\t" "lsl %2",
+		       xop, plen, 1);
+	}
+      else if (MAY_CLOBBER (s0))
         avr_asm_len ("lsl %0", &all_regs_rtx[s0], plen, 1);
       else
         avr_asm_len ("mov __tmp_reg__,%0" CR_TAB
diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index 4b990775b7f..9b0f782d385 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -78,3 +78,7 @@ The device has no SPH special function register. This option will be overridden
 Waddr-space-convert
 Warning C Report Var(avr_warn_addr_space_convert) Init(0)
 Warn if the address space of an address is changed.
+
+mfract-convert-truncate
+Target Report Mask(FRACT_CONV_TRUNC)
+Allow to use truncation instead of rounding towards 0 for fractional int types
diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c
index 7fab975a673..18457f8f7b6 100644
--- a/gcc/config/bfin/bfin.c
+++ b/gcc/config/bfin/bfin.c
@@ -46,6 +46,7 @@
 #include "cgraph.h"
 #include "langhooks.h"
 #include "bfin-protos.h"
+#include "tm_p.h"
 #include "tm-preds.h"
 #include "tm-constrs.h"
 #include "gt-bfin.h"
diff --git a/gcc/config/bfin/uclinux.h b/gcc/config/bfin/uclinux.h
index ca0f4ee8a35..63cba99cec6 100644
--- a/gcc/config/bfin/uclinux.h
+++ b/gcc/config/bfin/uclinux.h
@@ -44,3 +44,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_SUPPORTS_SYNC_CALLS 1
 
 #define SUBTARGET_FDPIC_NOT_SUPPORTED
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc/config/c6x/uclinux-elf.h b/gcc/config/c6x/uclinux-elf.h
index 5d61f4dc4ec..fa0937ed268 100644
--- a/gcc/config/c6x/uclinux-elf.h
+++ b/gcc/config/c6x/uclinux-elf.h
@@ -62,3 +62,5 @@
 		    : "0" (_beg), "b" (_end), "b" (_scno));	\
 }
 
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h
index 70b7fb00959..36d16b9e57a 100644
--- a/gcc/config/darwin-protos.h
+++ b/gcc/config/darwin-protos.h
@@ -123,3 +123,4 @@ extern bool darwin_kextabi_p (void);
 extern void darwin_override_options (void);
 extern void darwin_patch_builtins (void);
 extern void darwin_rename_builtins (void);
+extern bool darwin_libc_has_function (enum function_class fn_class);
diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
index e07fa4c8324..6c5d9c00623 100644
--- a/gcc/config/darwin.c
+++ b/gcc/config/darwin.c
@@ -3357,6 +3357,19 @@ darwin_rename_builtins (void)
     }
 }
 
+bool
+darwin_libc_has_function (enum function_class fn_class)
+{
+  if (fn_class == function_sincos)
+    return false;
+  if (fn_class == function_c99_math_complex
+      || fn_class == function_c99_misc)
+    return (TARGET_64BIT
+	    || strverscmp (darwin_macosx_version_min, "10.3") >= 0);
+
+  return true;
+}
+
 static hashval_t
 cfstring_hash (const void *ptr)
 {
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 82a42c8598b..596c9ef11f0 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -178,10 +178,11 @@ extern GTY(()) int darwin_ms_struct;
     %{L*} %(link_libgcc) %o %{fprofile-arcs|fprofile-generate*|coverage:-lgcov} \
     %{fopenmp|ftree-parallelize-loops=*: \
       %{static|static-libgcc|static-libstdc++|static-libgfortran: libgomp.a%s; : -lgomp } } \
-    %{fsanitize=address: -lasan } \
     %{fgnu-tm: \
       %{static|static-libgcc|static-libstdc++|static-libgfortran: libitm.a%s; : -litm } } \
     %{!nostdlib:%{!nodefaultlibs:\
+      %{%:sanitize(address): -lasan } \
+      %{%:sanitize(undefined): -lubsan } \
       %(link_ssp) %(link_gcc_c_sequence)\
     }}\
     %{!nostdlib:%{!nostartfiles:%E}} %{T*} %{F*} }}}}}}}"
@@ -874,10 +875,6 @@ void add_framework_path (char *);
 
 #define TARGET_POSIX_IO
 
-/* All new versions of Darwin have C99 functions.  */
-
-#define TARGET_C99_FUNCTIONS 1
-
 #define WINT_TYPE "int"
 
 /* Every program on darwin links against libSystem which contains the pthread
diff --git a/gcc/config/elfos.h b/gcc/config/elfos.h
index 438302345cf..9606fe0f85c 100644
--- a/gcc/config/elfos.h
+++ b/gcc/config/elfos.h
@@ -433,3 +433,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
   default_elf_asm_output_external (FILE, DECL, NAME)
 #endif
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc/config/epiphany/epiphany.c b/gcc/config/epiphany/epiphany.c
index 1dcdc4b3808..fd4c01c49a4 100644
--- a/gcc/config/epiphany/epiphany.c
+++ b/gcc/config/epiphany/epiphany.c
@@ -45,6 +45,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "ggc.h"
 #include "tm-constrs.h"
 #include "tree-pass.h"	/* for current_pass */
+#include "context.h"
+#include "pass_manager.h"
 
 /* Which cpu we're compiling for.  */
 int epiphany_cpu_type;
@@ -59,6 +61,9 @@ char epiphany_punct_chars[256];
 /* The rounding mode that we generally use for floating point.  */
 int epiphany_normal_fp_rounding;
 
+/* The pass instance, for use in epiphany_optimize_mode_switching. */
+static opt_pass *pass_mode_switch_use;
+
 static void epiphany_init_reg_tables (void);
 static int get_epiphany_condition_code (rtx);
 static tree epiphany_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
@@ -165,20 +170,26 @@ epiphany_init (void)
      pass because of the side offect of epiphany_mode_needed on
      MACHINE_FUNCTION(cfun)->unknown_mode_uses.  But it must run before
      pass_resolve_sw_modes.  */
-  static struct register_pass_info insert_use_info
-    = { &pass_mode_switch_use.pass, "mode_sw",
+  pass_mode_switch_use = make_pass_mode_switch_use (g);
+  struct register_pass_info insert_use_info
+    = { pass_mode_switch_use, "mode_sw",
 	1, PASS_POS_INSERT_AFTER
       };
-  static struct register_pass_info mode_sw2_info
-    = { &pass_mode_switching.pass, "mode_sw",
+  opt_pass *mode_sw2
+    = g->get_passes()->get_pass_mode_switching ()->clone ();
+  struct register_pass_info mode_sw2_info
+    = { mode_sw2, "mode_sw",
 	1, PASS_POS_INSERT_AFTER
       };
-  static struct register_pass_info mode_sw3_info
-    = { &pass_resolve_sw_modes.pass, "mode_sw",
+  opt_pass *mode_sw3 = make_pass_resolve_sw_modes (g);
+  struct register_pass_info mode_sw3_info
+    = { mode_sw3, "mode_sw",
 	1, PASS_POS_INSERT_AFTER
       };
-  static struct register_pass_info mode_sw4_info
-    = { &pass_split_all_insns.pass, "mode_sw",
+  opt_pass *mode_sw4
+    = g->get_passes()->get_pass_split_all_insns ()->clone ();
+  struct register_pass_info mode_sw4_info
+    = { mode_sw4, "mode_sw",
 	1, PASS_POS_INSERT_AFTER
       };
   static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING;
@@ -205,8 +216,10 @@ epiphany_init (void)
          (see http://gcc.gnu.org/ml/gcc-patches/2011-10/msg02819.html,)
          we need a second peephole2 pass to get reasonable code.  */
   {
-    static struct register_pass_info peep2_2_info
-      = { &pass_peephole2.pass, "peephole2",
+    opt_pass *extra_peephole2
+      = g->get_passes ()->get_pass_peephole2 ()->clone ();
+    struct register_pass_info peep2_2_info
+      = { extra_peephole2, "peephole2",
 	  1, PASS_POS_INSERT_AFTER
 	};
 
@@ -2256,7 +2269,7 @@ epiphany_optimize_mode_switching (int entity)
       return (MACHINE_FUNCTION (cfun)->sw_entities_processed
 	      & (1 << EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN)) != 0;
     case EPIPHANY_MSW_ENTITY_FPU_OMNIBUS:
-      return optimize == 0 || current_pass == &pass_mode_switch_use.pass;
+      return optimize == 0 || current_pass == pass_mode_switch_use;
     }
   gcc_unreachable ();
 }
diff --git a/gcc/config/epiphany/epiphany.h b/gcc/config/epiphany/epiphany.h
index bd84b5c793f..f16ab85dde9 100644
--- a/gcc/config/epiphany/epiphany.h
+++ b/gcc/config/epiphany/epiphany.h
@@ -929,8 +929,8 @@ enum
 };
 
 extern int epiphany_normal_fp_rounding;
-extern struct rtl_opt_pass pass_mode_switch_use;
-extern struct rtl_opt_pass pass_resolve_sw_modes;
+extern rtl_opt_pass *make_pass_mode_switch_use (gcc::context *ctxt);
+extern rtl_opt_pass *make_pass_resolve_sw_modes (gcc::context *ctxt);
 
 /* This will need to be adjusted when FP_CONTRACT_ON is properly
    implemented.  */
diff --git a/gcc/config/epiphany/epiphany.md b/gcc/config/epiphany/epiphany.md
index 1e2d2ab02ed..e8756ad8e23 100644
--- a/gcc/config/epiphany/epiphany.md
+++ b/gcc/config/epiphany/epiphany.md
@@ -587,7 +587,7 @@
 ; After mode-switching, floating point operations, fp_sfuncs and calls
 ; must exhibit the use of the control register, lest the setting of the
 ; control register could be deleted or moved.  OTOH a use of a hard register
-; greatly coundounds optimizers like the rtl loop optimizers or combine.
+; greatly counfounds optimizers like the rtl loop optimizers or combine.
 ; Therefore, we put an extra pass immediately after the mode switching pass
 ; that inserts the USEs of the control registers, and sets a flag in struct
 ; machine_function that float_operation can henceforth only match with that
@@ -1058,6 +1058,28 @@
 	      (clobber (reg:CC CC_REGNUM))])]
 )
 
+(define_peephole2
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:SI 0 "gpr_operand" "")
+	   (mult:SI
+	      (match_operand:SI 1 "gpr_operand" "")
+	      (match_operand:SI 2 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  "prev_active_insn (peep2_next_insn (0))
+   && get_attr_sched_use_fpu (prev_active_insn (peep2_next_insn (0)))
+   && peep2_regno_dead_p (1, CC_REGNUM)
+   && get_attr_sched_use_fpu (next_active_insn (peep2_next_insn (0)))
+   && find_reg_note (insn, REG_EQUAL, NULL_RTX) != NULL_RTX
+   && GET_CODE (XEXP (find_reg_note (insn, REG_EQUAL, NULL_RTX), 0)) == MULT
+   && CONST_INT_P (XEXP (XEXP (find_reg_note (insn, REG_EQUAL, NULL_RTX), 0),
+			 1))"
+  [(parallel [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 4)))
+	      (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[4]
+    = XEXP (XEXP (find_reg_note (curr_insn, REG_EQUAL, NULL_RTX), 0), 1);
+})
+
 (define_expand "mulsi3"
   [(parallel
      [(set (match_operand:SI 0 "gpr_operand" "")
@@ -2530,6 +2552,106 @@
   [(set_attr "length" "8")
    (set_attr "type" "v2fp")])
 
+(define_expand "ashlv2si3"
+  [(parallel
+     [(set (match_operand:V2SI 0 "gpr_operand" "")
+	   (ashift:V2SI (match_operand:V2SI 1 "gpr_operand" "")
+			(match_operand:SI 2 "general_operand")))
+      (use (match_dup 3))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+{
+  if (const_int_operand (operands[2], VOIDmode))
+    operands[3]
+      = copy_to_mode_reg (SImode, GEN_INT (1 << INTVAL (operands[2])));
+  else
+    {
+      int o, i;
+      rtx xop[2], last_out = pc_rtx;
+
+      for (o = 0; o <= UNITS_PER_WORD; o += UNITS_PER_WORD)
+	{
+	  for (i = 0; i < 2; i++)
+	    {
+	      xop[i]
+		= (i == 2 ? operands[2]
+		   : simplify_gen_subreg (SImode, operands[i], V2SImode, o));
+	      gcc_assert (!reg_overlap_mentioned_p (last_out, xop[i])
+			  /* ??? reg_overlap_mentioned_p doesn't understand
+			     about multi-word SUBREGs.  */
+			  || (GET_CODE (last_out) == SUBREG
+			      && GET_CODE (xop[i]) == SUBREG
+			      && SUBREG_REG (last_out) == SUBREG_REG (xop[i])
+			      && ((SUBREG_BYTE (last_out) & -UNITS_PER_WORD)
+				  != (SUBREG_BYTE (xop[i]) & -UNITS_PER_WORD))));
+	    }
+	  emit_insn (gen_ashlsi3 (xop[0], xop[1], operands[2]));
+	  last_out = xop[0];
+	}
+      DONE;
+    }
+})
+
+(define_insn_and_split "*ashlv2si3_i"
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:V2SI 0 "gpr_operand" "=&r,*1*2")
+	   (ashift:V2SI (match_operand:V2SI 1 "gpr_operand" "r,r")
+			(match_operand 2 "const_int_operand" "n,n")))
+      (use (match_operand:SI 4 "gpr_operand" "r,r"))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel
+     [(set (match_dup 5) (mult:SI (match_dup 6) (match_dup 4)))
+	   (clobber (reg:CC_FP CCFP_REGNUM))
+	   (match_dup 9)
+	   (match_dup 10)])
+   (parallel
+     [(set (match_dup 7) (mult:SI (match_dup 8) (match_dup 4)))
+	   (clobber (reg:CC_FP CCFP_REGNUM))
+	   (match_dup 9)
+	   (match_dup 10)])]
+{
+  operands[5] = simplify_gen_subreg (SImode, operands[0], V2SImode, 0);
+  operands[6] = simplify_gen_subreg (SImode, operands[1], V2SImode, 0);
+  operands[7] = simplify_gen_subreg (SImode, operands[0],
+				     V2SImode, UNITS_PER_WORD);
+  operands[8] = simplify_gen_subreg (SImode, operands[1],
+				     V2SImode, UNITS_PER_WORD);
+  gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[8]));
+  gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[4]));
+  operands[9] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 2);
+  operands[10] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 1);
+  rtx insn
+    = (gen_rtx_PARALLEL
+	(VOIDmode,
+	 gen_rtvec
+	  (4,
+	   gen_rtx_SET (VOIDmode, operands[5],
+			gen_rtx_MULT (SImode, operands[6], operands[4])),
+	   gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CC_FPmode, CCFP_REGNUM)),
+	   operands[9], operands[10])));
+  insn = emit_insn (insn);
+  add_reg_note (insn, REG_EQUAL,
+		gen_rtx_ASHIFT (SImode, operands[6], operands[2]));
+  insn
+    = (gen_rtx_PARALLEL
+	(VOIDmode,
+	 gen_rtvec
+	  (4,
+	   gen_rtx_SET (VOIDmode, operands[7],
+			gen_rtx_MULT (SImode, operands[8], operands[4])),
+	   gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CC_FPmode, CCFP_REGNUM)),
+	   operands[9], operands[10])));
+  insn = emit_insn (insn);
+  add_reg_note (insn, REG_EQUAL,
+		gen_rtx_ASHIFT (SImode, operands[7], operands[2]));
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "fp_int")])
+
 (define_expand "mul<mode>3"
   [(parallel
      [(set (match_operand:DWV2MODE 0 "gpr_operand" "")
diff --git a/gcc/config/epiphany/mode-switch-use.c b/gcc/config/epiphany/mode-switch-use.c
index 66529636801..8e278583215 100644
--- a/gcc/config/epiphany/mode-switch-use.c
+++ b/gcc/config/epiphany/mode-switch-use.c
@@ -71,22 +71,39 @@ insert_uses (void)
   return 0;
 }
 
-struct rtl_opt_pass pass_mode_switch_use =
+namespace {
+
+const pass_data pass_data_mode_switch_use =
 {
- {
-  RTL_PASS,
-  "mode_switch_use",			/* name */
-  OPTGROUP_NONE,			/* optinfo_flags */
-  NULL,					/* gate */
-  insert_uses,				/* execute */
-  NULL,					/* sub */
-  NULL,					/* next */
-  0,					/* static_pass_number */
-  TV_NONE,				/* tv_id */
-  0,					/* properties_required */
-  0,					/* properties_provided */
-  0,					/* properties_destroyed */
-  0,					/* todo_flags_start */
-  0,					/* todo_flags_finish */
- }
+  RTL_PASS, /* type */
+  "mode_switch_use", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  false, /* has_gate */
+  true, /* has_execute */
+  TV_NONE, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
 };
+
+class pass_mode_switch_use : public rtl_opt_pass
+{
+public:
+  pass_mode_switch_use(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_mode_switch_use, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  unsigned int execute () { return insert_uses (); }
+
+}; // class pass_mode_switch_use
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_mode_switch_use (gcc::context *ctxt)
+{
+  return new pass_mode_switch_use (ctxt);
+}
diff --git a/gcc/config/epiphany/predicates.md b/gcc/config/epiphany/predicates.md
index af60d7c73f7..b77867cc851 100644
--- a/gcc/config/epiphany/predicates.md
+++ b/gcc/config/epiphany/predicates.md
@@ -292,7 +292,11 @@
   bool inserted = MACHINE_FUNCTION (cfun)->control_use_inserted;
   int i;
 
-  if (count == 2)
+  if (count == 2
+      /* Vector ashift has an extra use for the constant factor required to
+	 implement the shift as multiply.  */
+      || (count == 3 && GET_CODE (XVECEXP (op, 0, 0)) == SET
+	  && GET_CODE (XEXP (XVECEXP (op, 0, 0), 1)) == ASHIFT))
     return !inserted;
 
   /* combine / recog will pass any old garbage here before checking the
@@ -302,7 +306,7 @@
 
   i = 1;
   if (count > 4)
-    for (i = 4; i < count; i++)
+    for (i = 2; i < count; i++)
       {
 	rtx x = XVECEXP (op, 0, i);
 
diff --git a/gcc/config/epiphany/resolve-sw-modes.c b/gcc/config/epiphany/resolve-sw-modes.c
index 729a0ffc9b7..b43b4d953cd 100644
--- a/gcc/config/epiphany/resolve-sw-modes.c
+++ b/gcc/config/epiphany/resolve-sw-modes.c
@@ -161,23 +161,40 @@ resolve_sw_modes (void)
   return 0;
 }
 
-struct rtl_opt_pass pass_resolve_sw_modes =
+namespace {
+
+const pass_data pass_data_resolve_sw_modes =
 {
- {
-  RTL_PASS,
-  "resolve_sw_modes",			/* name */
-  OPTGROUP_NONE,			/* optinfo_flags */
-  gate_resolve_sw_modes,		/* gate */
-  resolve_sw_modes,			/* execute */
-  NULL,					/* sub */
-  NULL,					/* next */
-  0,					/* static_pass_number */
-  TV_MODE_SWITCH,			/* tv_id */
-  0,					/* properties_required */
-  0,					/* properties_provided */
-  0,					/* properties_destroyed */
-  0,					/* todo_flags_start */
-  TODO_df_finish | TODO_verify_rtl_sharing |
-  0					/* todo_flags_finish */
- }
+  RTL_PASS, /* type */
+  "resolve_sw_modes", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_MODE_SWITCH, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
 };
+
+class pass_resolve_sw_modes : public rtl_opt_pass
+{
+public:
+  pass_resolve_sw_modes(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_resolve_sw_modes, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return gate_resolve_sw_modes (); }
+  unsigned int execute () { return resolve_sw_modes (); }
+
+}; // class pass_resolve_sw_modes
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_resolve_sw_modes (gcc::context *ctxt)
+{
+  return new pass_resolve_sw_modes (ctxt);
+}
diff --git a/gcc/config/freebsd.h b/gcc/config/freebsd.h
index 87c0acf1d89..da66253e660 100644
--- a/gcc/config/freebsd.h
+++ b/gcc/config/freebsd.h
@@ -52,6 +52,9 @@ along with GCC; see the file COPYING3.  If not see
 #define LINK_SSP_SPEC "%{fstack-protector|fstack-protector-all:-lssp_nonshared}"
 #endif
 
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
 /* Use --as-needed -lgcc_s for eh support.  */
 #ifdef HAVE_LD_AS_NEEDED
 #define USE_LD_AS_NEEDED 1
diff --git a/gcc/config/gnu-user.h b/gcc/config/gnu-user.h
index bcdf0e6cc5a..6f6915842b9 100644
--- a/gcc/config/gnu-user.h
+++ b/gcc/config/gnu-user.h
@@ -39,15 +39,21 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    the GNU userspace magical crtbegin.o file (see crtstuff.c) which
    provides part of the support for getting C++ file-scope static
    object constructed before entering `main'.  */
-   
+
 #if defined HAVE_LD_PIE
 #define GNU_USER_TARGET_STARTFILE_SPEC \
   "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \
-   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \
+   %{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_start_preinit.o%s; \
+     fvtable-verify=std:vtv_start.o%s}"
 #else
 #define GNU_USER_TARGET_STARTFILE_SPEC \
   "%{!shared: %{pg|p|profile:gcrt1.o%s;:crt1.o%s}} \
-   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \
+   %{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_start_preinit.o%s; \
+     fvtable-verify=std:vtv_start.o%s}"
 #endif
 #undef  STARTFILE_SPEC
 #define STARTFILE_SPEC GNU_USER_TARGET_STARTFILE_SPEC
@@ -59,7 +65,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    GNU userspace "finalizer" file, `crtn.o'.  */
 
 #define GNU_USER_TARGET_ENDFILE_SPEC \
-  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+  "%{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_end_preinit.o%s; \
+     fvtable-verify=std:vtv_end.o%s} \
+   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
 #undef  ENDFILE_SPEC
 #define ENDFILE_SPEC GNU_USER_TARGET_ENDFILE_SPEC
 
@@ -73,10 +82,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #undef CPLUSPLUS_CPP_SPEC
 #define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
 
-#define GNU_USER_TARGET_LIB_SPEC \
-  "%{pthread:-lpthread} \
-   %{shared:-lc} \
+#define GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC \
+  "%{shared:-lc} \
    %{!shared:%{mieee-fp:-lieee} %{profile:-lc_p}%{!profile:-lc}}"
+
+#define GNU_USER_TARGET_LIB_SPEC \
+  "%{pthread:-lpthread} " \
+  GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC
+
 #undef  LIB_SPEC
 #define LIB_SPEC GNU_USER_TARGET_LIB_SPEC
 
@@ -95,8 +108,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 #define TARGET_POSIX_IO
 
-#define TARGET_C99_FUNCTIONS 1
-#define TARGET_HAS_SINCOS 1
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function
 
 /* Link -lasan early on the command line.  For -static-libasan, don't link
    it for -shared link, the executable should be compiled with -static-libasan
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 6cb53b8aafb..28e626ff3be 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -19,7 +19,7 @@
 
 ;;; Unused letters:
 ;;;     B     H           T
-;;;           h jk          v
+;;;           h jk
 
 ;; Integer register constraints.
 ;; It is not necessary to define 'r' here.
@@ -101,11 +101,11 @@
  "First SSE register (@code{%xmm0}).")
 
 (define_register_constraint "Yi"
- "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC ? SSE_REGS : NO_REGS"
+ "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC ? ALL_SSE_REGS : NO_REGS"
  "@internal Any SSE register, when SSE2 and inter-unit moves to vector registers are enabled.")
 
 (define_register_constraint "Yj"
- "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC ? SSE_REGS : NO_REGS"
+ "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC ? ALL_SSE_REGS : NO_REGS"
  "@internal Any SSE register, when SSE2 and inter-unit moves from vector registers are enabled.")
 
 (define_register_constraint "Ym"
@@ -138,6 +138,9 @@
  "(ix86_fpmath & FPMATH_387) ? FLOAT_REGS : NO_REGS"
  "@internal Any x87 register when 80387 FP arithmetic is enabled.")
 
+(define_register_constraint "v" "TARGET_SSE ? ALL_SSE_REGS : NO_REGS"
+ "Any EVEX encodable SSE register (@code{%xmm0-%xmm31}).")
+
 (define_constraint "z"
   "@internal Constant call address operand."
   (match_operand 0 "constant_call_address_operand"))
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index c1e1eba12f1..aa91e1ab8d8 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -71,8 +71,12 @@
 #define bit_AVX2	(1 << 5)
 #define bit_BMI2	(1 << 8)
 #define bit_RTM	(1 << 11)
+#define bit_AVX512F	(1 << 16)
 #define bit_RDSEED	(1 << 18)
 #define bit_ADX	(1 << 19)
+#define bit_AVX512PF	(1 << 26)
+#define bit_AVX512ER	(1 << 27)
+#define bit_AVX512CD	(1 << 28)
 
 /* Extended State Enumeration Sub-leaf (%eax == 13, %ecx == 1) */
 #define bit_XSAVEOPT	(1 << 0)
diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h
index 27187641aad..9cb66d646be 100644
--- a/gcc/config/i386/cygming.h
+++ b/gcc/config/i386/cygming.h
@@ -171,6 +171,9 @@ along with GCC; see the file COPYING3.  If not see
 #undef MATH_LIBRARY
 #define MATH_LIBRARY ""
 
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
 #define SIZE_TYPE (TARGET_64BIT ? "long long unsigned int" : "unsigned int")
 #define PTRDIFF_TYPE (TARGET_64BIT ? "long long int" : "int")
 
diff --git a/gcc/config/i386/djgpp.h b/gcc/config/i386/djgpp.h
index 05f9dfda71d..cc420d0a6d6 100644
--- a/gcc/config/i386/djgpp.h
+++ b/gcc/config/i386/djgpp.h
@@ -117,6 +117,17 @@ along with GCC; see the file COPYING3.  If not see
 #define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
   asm_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
 
+/* Write the extra assembler code needed to declare a function properly.  */
+
+#ifndef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      ASM_OUTPUT_FUNCTION_LABEL (FILE, NAME, DECL);		\
+    }								\
+  while (0)
+#endif
+
 /* This is how to tell assembler that a symbol is weak  */ 
 #undef ASM_WEAKEN_LABEL
 #define ASM_WEAKEN_LABEL(FILE,NAME) \
@@ -127,6 +138,9 @@ along with GCC; see the file COPYING3.  If not see
    in libgcc, nor call one in main().  */
 #define HAS_INIT_SECTION
 
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
 /* Definitions for types and sizes. Wide characters are 16-bits long so
    Win32 compiler add-ons will be wide character compatible.  */
 #undef WCHAR_TYPE_SIZE
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index c8b71c8edf9..4cb9907b5ed 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -390,6 +390,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
   unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
   unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
   unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
+  unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
+  unsigned int has_avx512f = 0;
 
   bool arch;
 
@@ -461,6 +463,10 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       has_fsgsbase = ebx & bit_FSGSBASE;
       has_rdseed = ebx & bit_RDSEED;
       has_adx = ebx & bit_ADX;
+      has_avx512f = ebx & bit_AVX512F;
+      has_avx512er = ebx & bit_AVX512ER;
+      has_avx512pf = ebx & bit_AVX512PF;
+      has_avx512cd = ebx & bit_AVX512CD;
     }
 
   if (max_level >= 13)
@@ -638,13 +644,18 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 	  /* Atom.  */
 	  cpu = "atom";
 	  break;
+	case 0x0f:
+	  /* Merom.  */
+	case 0x17:
+	case 0x1d:
+	  /* Penryn.  */
+	  cpu = "core2";
+	  break;
 	case 0x1a:
 	case 0x1e:
 	case 0x1f:
 	case 0x2e:
 	  /* Nehalem.  */
-	  cpu = "corei7";
-	  break;
 	case 0x25:
 	case 0x2c:
 	case 0x2f:
@@ -656,20 +667,25 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 	  /* Sandy Bridge.  */
 	  cpu = "corei7-avx";
 	  break;
-	case 0x17:
-	case 0x1d:
-	  /* Penryn.  */
-	  cpu = "core2";
+	case 0x3a:
+	case 0x3e:
+	  /* Ivy Bridge.  */
+	  cpu = "core-avx-i";
 	  break;
-	case 0x0f:
-	  /* Merom.  */
-	  cpu = "core2";
+	case 0x3c:
+	case 0x45:
+	case 0x46:
+	  /* Haswell.  */
+	  cpu = "core-avx2";
 	  break;
 	default:
 	  if (arch)
 	    {
 	      /* This is unknown family 0x6 CPU.  */
-	      if (has_avx)
+	      if (has_avx2)
+		/* Assume Haswell.  */
+		cpu = "core-avx2";
+	      else if (has_avx)
 		/* Assume Sandy Bridge.  */
 		cpu = "corei7-avx";
 	      else if (has_sse4_2)
@@ -828,13 +844,18 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
       const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
       const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
+      const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
+      const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
+      const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
+      const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
 
       options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
 			sse4a, cx16, sahf, movbe, aes, pclmul,
 			popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
 			tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
 			hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
-			fxsr, xsave, xsaveopt, NULL);
+			fxsr, xsave, xsaveopt, avx512f, avx512er,
+			avx512cd, avx512pf, NULL);
     }
 
 done:
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 31dd28a94cb..14349be0af5 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -306,6 +306,14 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     def_or_undef (parse_in, "__AVX__");
   if (isa_flag & OPTION_MASK_ISA_AVX2)
     def_or_undef (parse_in, "__AVX2__");
+  if (isa_flag & OPTION_MASK_ISA_AVX512F)
+    def_or_undef (parse_in, "__AVX512F__");
+  if (isa_flag & OPTION_MASK_ISA_AVX512ER)
+    def_or_undef (parse_in, "__AVX512ER__");
+  if (isa_flag & OPTION_MASK_ISA_AVX512CD)
+    def_or_undef (parse_in, "__AVX512CD__");
+  if (isa_flag & OPTION_MASK_ISA_AVX512PF)
+    def_or_undef (parse_in, "__AVX512PF__");
   if (isa_flag & OPTION_MASK_ISA_FMA)
     def_or_undef (parse_in, "__FMA__");
   if (isa_flag & OPTION_MASK_ISA_RTM)
diff --git a/gcc/config/i386/i386-interix.h b/gcc/config/i386/i386-interix.h
index c74e008b0d9..b99f4d9b908 100644
--- a/gcc/config/i386/i386-interix.h
+++ b/gcc/config/i386/i386-interix.h
@@ -143,6 +143,9 @@ do {									\
 #undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
 #define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
 
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
 /* The following are needed for us to be able to use winnt.c, but are not
    otherwise meaningful to Interix.  (The functions that use these are
    never called because we don't do DLLs.) */
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 393cd4a23be..e0b8fc826ab 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -76,16 +76,19 @@ VECTOR_MODES (INT, 8);        /*         V8QI V4HI V2SI */
 VECTOR_MODES (INT, 16);       /*   V16QI V8HI V4SI V2DI */
 VECTOR_MODES (INT, 32);       /*  V32QI V16HI V8SI V4DI */
 VECTOR_MODES (INT, 64);       /* V64QI V32HI V16SI V8DI */
+VECTOR_MODES (INT, 128);      /* V128QI V64HI V32SI V16DI */
 VECTOR_MODES (FLOAT, 8);      /*              V4HF V2SF */
 VECTOR_MODES (FLOAT, 16);     /*         V8HF V4SF V2DF */
 VECTOR_MODES (FLOAT, 32);     /*        V16HF V8SF V4DF */
 VECTOR_MODES (FLOAT, 64);     /*       V32HF V16SF V8DF */
+VECTOR_MODES (FLOAT, 128);    /*      V64HF V32SF V16DF */
 VECTOR_MODE (INT, TI, 1);     /*                   V1TI */
 VECTOR_MODE (INT, DI, 1);     /*                   V1DI */
 VECTOR_MODE (INT, SI, 1);     /*                   V1SI */
 VECTOR_MODE (INT, QI, 2);     /*                   V2QI */
 
 INT_MODE (OI, 32);
+INT_MODE (XI, 64);
 
 /* The symbol Pmode stands for one of the above machine modes (usually SImode).
    The tm.h file specifies which one.  It is not a distinct mode.  */
diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index bea1c257830..5fcbd6b5776 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -28,15 +28,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 /* Algorithm to expand string function with.  */
 enum stringop_alg
 {
-   no_stringop,
-   libcall,
-   rep_prefix_1_byte,
-   rep_prefix_4_byte,
-   rep_prefix_8_byte,
-   loop_1_byte,
-   loop,
-   unrolled_loop,
-   vector_loop
+#undef DEF_ENUM
+#define DEF_ENUM
+
+#undef DEF_ALG
+#define DEF_ALG(alg, name) alg, 
+
+#include "stringop.def"
+last_alg
+
+#undef DEF_ENUM
+#undef DEF_ALG
 };
 
 /* Available call abi.  */
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 09667893910..3ab2f3a2ac8 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -173,6 +173,8 @@ extern int ix86_mode_after (int, int, rtx);
 extern int ix86_mode_entry (int);
 extern int ix86_mode_exit (int);
 
+extern bool ix86_libc_has_function (enum function_class fn_class);
+
 #ifdef HARD_CONST
 extern void ix86_emit_mode_set (int, int, HARD_REG_SET);
 #endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 0c546af00b2..e2fa71a369a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -62,6 +62,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "dumpfile.h"
 #include "tree-pass.h"
 #include "tree-flow.h"
+#include "context.h"
+#include "pass_manager.h"
 
 static rtx legitimize_dllimport_symbol (rtx, bool);
 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
@@ -85,6 +87,13 @@ static rtx legitimize_pe_coff_symbol (rtx, bool);
 
 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
 
+static stringop_algs ix86_size_memcpy[2] = {
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
+static stringop_algs ix86_size_memset[2] = {
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
+
 const
 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
   COSTS_N_BYTES (2),			/* cost of an add instruction */
@@ -138,10 +147,8 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
   COSTS_N_BYTES (2),			/* cost of FABS instruction.  */
   COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
   COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
-  {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
-   {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}},
-  {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
-   {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}},
+  ix86_size_memcpy,
+  ix86_size_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -156,6 +163,13 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
 };
 
 /* Processor costs (relative to an add) */
+static stringop_algs i386_memcpy[2] = {
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs i386_memset[2] = {
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+
 static const
 struct processor_costs i386_cost = {	/* 386 specific costs */
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -209,10 +223,8 @@ struct processor_costs i386_cost = {	/* 386 specific costs */
   COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
-  {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
-   DUMMY_STRINGOP_ALGS},
-  {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
-   DUMMY_STRINGOP_ALGS},
+  i386_memcpy,
+  i386_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -226,6 +238,13 @@ struct processor_costs i386_cost = {	/* 386 specific costs */
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+static stringop_algs i486_memcpy[2] = {
+  {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs i486_memset[2] = {
+  {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+
 static const
 struct processor_costs i486_cost = {	/* 486 specific costs */
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -281,10 +300,8 @@ struct processor_costs i486_cost = {	/* 486 specific costs */
   COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
-  {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
-   DUMMY_STRINGOP_ALGS},
-  {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
-   DUMMY_STRINGOP_ALGS},
+  i486_memcpy,
+  i486_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -298,6 +315,13 @@ struct processor_costs i486_cost = {	/* 486 specific costs */
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+static stringop_algs pentium_memcpy[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs pentium_memset[2] = {
+  {libcall, {{-1, rep_prefix_4_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+
 static const
 struct processor_costs pentium_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -351,10 +375,8 @@ struct processor_costs pentium_cost = {
   COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
-  {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   DUMMY_STRINGOP_ALGS},
-  {{libcall, {{-1, rep_prefix_4_byte, false}}},
-   DUMMY_STRINGOP_ALGS},
+  pentium_memcpy,
+  pentium_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -368,6 +390,21 @@ struct processor_costs pentium_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
+   (we ensure the alignment).  For small blocks inline loop is still a
+   noticeable win, for bigger blocks either rep movsl or rep movsb is
+   way to go.  Rep movsb has apparently more expensive startup time in CPU,
+   but after 4K the difference is down in the noise.  */
+static stringop_algs pentiumpro_memcpy[2] = {
+  {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
+                       {8192, rep_prefix_4_byte, false},
+                       {-1, rep_prefix_1_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs pentiumpro_memset[2] = {
+  {rep_prefix_4_byte, {{1024, unrolled_loop, false},
+                       {8192, rep_prefix_4_byte, false},
+                       {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
 static const
 struct processor_costs pentiumpro_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -421,19 +458,8 @@ struct processor_costs pentiumpro_cost = {
   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
-  /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
-     (we ensure the alignment).  For small blocks inline loop is still a
-     noticeable win, for bigger blocks either rep movsl or rep movsb is
-     way to go.  Rep movsb has apparently more expensive startup time in CPU,
-     but after 4K the difference is down in the noise.  */
-  {{rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
-			{8192, rep_prefix_4_byte, false},
-		        {-1, rep_prefix_1_byte, false}}},
-   DUMMY_STRINGOP_ALGS},
-  {{rep_prefix_4_byte, {{1024, unrolled_loop, false},
-  			{8192, rep_prefix_4_byte, false},
-			{-1, libcall, false}}},
-   DUMMY_STRINGOP_ALGS},
+  pentiumpro_memcpy,
+  pentiumpro_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -447,6 +473,12 @@ struct processor_costs pentiumpro_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+static stringop_algs geode_memcpy[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs geode_memset[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
 static const
 struct processor_costs geode_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -501,10 +533,8 @@ struct processor_costs geode_cost = {
   COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (54),			/* cost of FSQRT instruction.  */
-  {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   DUMMY_STRINGOP_ALGS},
-  {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   DUMMY_STRINGOP_ALGS},
+  geode_memcpy,
+  geode_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -518,6 +548,12 @@ struct processor_costs geode_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+static stringop_algs k6_memcpy[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs k6_memset[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
 static const
 struct processor_costs k6_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -574,10 +610,8 @@ struct processor_costs k6_cost = {
   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
-  {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   DUMMY_STRINGOP_ALGS},
-  {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   DUMMY_STRINGOP_ALGS},
+  k6_memcpy,
+  k6_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -591,6 +625,15 @@ struct processor_costs k6_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+/* For some reason, Athlon deals better with REP prefix (relative to loops)
+   compared to K8. Alignment becomes important after 8 bytes for memcpy and
+   128 bytes for memset.  */
+static stringop_algs athlon_memcpy[2] = {
+  {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs athlon_memset[2] = {
+  {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
 static const
 struct processor_costs athlon_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -644,13 +687,8 @@ struct processor_costs athlon_cost = {
   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
-  /* For some reason, Athlon deals better with REP prefix (relative to loops)
-     compared to K8. Alignment becomes important after 8 bytes for memcpy and
-     128 bytes for memset.  */
-  {{libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   DUMMY_STRINGOP_ALGS},
-  {{libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   DUMMY_STRINGOP_ALGS},
+  athlon_memcpy,
+  athlon_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -664,6 +702,19 @@ struct processor_costs athlon_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+/* K8 has optimized REP instruction for medium sized blocks, but for very
+   small blocks it is better to use loop. For large blocks, libcall can
+   do nontemporary accesses and beat inline considerably.  */
+static stringop_algs k8_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs k8_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
 static const
 struct processor_costs k8_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -722,17 +773,9 @@ struct processor_costs k8_cost = {
   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
-  /* K8 has optimized REP instruction for medium sized blocks, but for very
-     small blocks it is better to use loop. For large blocks, libcall can
-     do nontemporary accesses and beat inline considerably.  */
-  {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
-	      {-1, rep_prefix_4_byte, false}}},
-   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
-  {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
-	      {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   {libcall, {{48, unrolled_loop, false},
-	      {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
+
+  k8_memcpy,
+  k8_memset,
   4,					/* scalar_stmt_cost.  */
   2,					/* scalar load_cost.  */
   2,					/* scalar_store_cost.  */
@@ -746,6 +789,19 @@ struct processor_costs k8_cost = {
   2,					/* cond_not_taken_branch_cost.  */
 };
 
+/* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
+   very small blocks it is better to use loop. For large blocks, libcall can
+   do nontemporary accesses and beat inline considerably.  */
+static stringop_algs amdfam10_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs amdfam10_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
 struct processor_costs amdfam10_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (2),			/* cost of a lea instruction */
@@ -812,17 +868,8 @@ struct processor_costs amdfam10_cost = {
   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
 
-  /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
-     very small blocks it is better to use loop. For large blocks, libcall can
-     do nontemporary accesses and beat inline considerably.  */
-  {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
-	      {-1, rep_prefix_4_byte, false}}},
-   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
-  {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
-	      {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
+  amdfam10_memcpy,
+  amdfam10_memset,
   4,					/* scalar_stmt_cost.  */
   2,					/* scalar load_cost.  */
   2,					/* scalar_store_cost.  */
@@ -836,7 +883,21 @@ struct processor_costs amdfam10_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
-struct processor_costs bdver1_cost = {
+/*  BDVER1 has optimized REP instruction for medium sized blocks, but for
+    very small blocks it is better to use loop. For large blocks, libcall
+    can do nontemporary accesses and beat inline considerably.  */
+static stringop_algs bdver1_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs bdver1_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+
+const struct processor_costs bdver1_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (1),			/* cost of a lea instruction */
   COSTS_N_INSNS (1),			/* variable shift costs */
@@ -902,17 +963,8 @@ struct processor_costs bdver1_cost = {
   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
 
-  /*  BDVER1 has optimized REP instruction for medium sized blocks, but for
-      very small blocks it is better to use loop. For large blocks, libcall
-      can do nontemporary accesses and beat inline considerably.  */
-  {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
-	      {-1, rep_prefix_4_byte, false}}},
-   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
-  {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
-	      {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
+  bdver1_memcpy,
+  bdver1_memset,
   6,					/* scalar_stmt_cost.  */
   4,					/* scalar load_cost.  */
   4,					/* scalar_store_cost.  */
@@ -926,7 +978,22 @@ struct processor_costs bdver1_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
-struct processor_costs bdver2_cost = {
+/*  BDVER2 has optimized REP instruction for medium sized blocks, but for
+    very small blocks it is better to use loop. For large blocks, libcall
+    can do nontemporary accesses and beat inline considerably.  */
+
+static stringop_algs bdver2_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs bdver2_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+
+const struct processor_costs bdver2_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (1),			/* cost of a lea instruction */
   COSTS_N_INSNS (1),			/* variable shift costs */
@@ -992,17 +1059,8 @@ struct processor_costs bdver2_cost = {
   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
 
-  /*  BDVER2 has optimized REP instruction for medium sized blocks, but for
-      very small blocks it is better to use loop. For large blocks, libcall
-      can do nontemporary accesses and beat inline considerably.  */
-  {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
-	      {-1, rep_prefix_4_byte, false}}},
-   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
-  {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
-	      {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
+  bdver2_memcpy,
+  bdver2_memset,
   6,					/* scalar_stmt_cost.  */
   4,					/* scalar load_cost.  */
   4,					/* scalar_store_cost.  */
@@ -1016,6 +1074,20 @@ struct processor_costs bdver2_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+
+  /*  BDVER3 has optimized REP instruction for medium sized blocks, but for
+      very small blocks it is better to use loop. For large blocks, libcall
+      can do nontemporary accesses and beat inline considerably.  */
+static stringop_algs bdver3_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs bdver3_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
 struct processor_costs bdver3_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (1),			/* cost of a lea instruction */
@@ -1074,17 +1146,8 @@ struct processor_costs bdver3_cost = {
   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
 
-  /*  BDVER3 has optimized REP instruction for medium sized blocks, but for
-      very small blocks it is better to use loop. For large blocks, libcall
-      can do nontemporary accesses and beat inline considerably.  */
-  {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
-	      {-1, rep_prefix_4_byte, false}}},
-   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
-  {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
-	      {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
+  bdver3_memcpy,
+  bdver3_memset,
   6,					/* scalar_stmt_cost.  */
   4,					/* scalar load_cost.  */
   4,					/* scalar_store_cost.  */
@@ -1098,7 +1161,20 @@ struct processor_costs bdver3_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
-struct processor_costs btver1_cost = {
+  /* BTVER1 has optimized REP instruction for medium sized blocks, but for
+     very small blocks it is better to use loop. For large blocks, libcall can
+     do nontemporary accesses and beat inline considerably.  */
+static stringop_algs btver1_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs btver1_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+const struct processor_costs btver1_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (2),			/* cost of a lea instruction */
   COSTS_N_INSNS (1),			/* variable shift costs */
@@ -1159,17 +1235,8 @@ struct processor_costs btver1_cost = {
   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
 
-  /* BTVER1 has optimized REP instruction for medium sized blocks, but for
-     very small blocks it is better to use loop. For large blocks, libcall can
-     do nontemporary accesses and beat inline considerably.  */
-  {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
-	      {-1, rep_prefix_4_byte, false}}},
-   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
-  {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
-	      {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
+  btver1_memcpy,
+  btver1_memset,
   4,					/* scalar_stmt_cost.  */
   2,					/* scalar load_cost.  */
   2,					/* scalar_store_cost.  */
@@ -1183,7 +1250,17 @@ struct processor_costs btver1_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
-struct processor_costs btver2_cost = {
+static stringop_algs btver2_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs btver2_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+const struct processor_costs btver2_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (2),			/* cost of a lea instruction */
   COSTS_N_INSNS (1),			/* variable shift costs */
@@ -1243,15 +1320,8 @@ struct processor_costs btver2_cost = {
   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
-
-  {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
-	      {-1, rep_prefix_4_byte, false}}},
-   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
-  {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
-	      {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
+  btver2_memcpy,
+  btver2_memset,
   4,					/* scalar_stmt_cost.  */
   2,					/* scalar load_cost.  */
   2,					/* scalar_store_cost.  */
@@ -1265,6 +1335,14 @@ struct processor_costs btver2_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+static stringop_algs pentium4_memcpy[2] = {
+  {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs pentium4_memset[2] = {
+  {libcall, {{6, loop_1_byte, false}, {48, loop, false},
+             {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+
 static const
 struct processor_costs pentium4_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -1318,11 +1396,8 @@ struct processor_costs pentium4_cost = {
   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
-  {{libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
-   DUMMY_STRINGOP_ALGS},
-  {{libcall, {{6, loop_1_byte, false}, {48, loop, false},
-	      {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   DUMMY_STRINGOP_ALGS},
+  pentium4_memcpy,
+  pentium4_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -1336,6 +1411,17 @@ struct processor_costs pentium4_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+static stringop_algs nocona_memcpy[2] = {
+  {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
+             {100000, unrolled_loop, false}, {-1, libcall, false}}}};
+
+static stringop_algs nocona_memset[2] = {
+  {libcall, {{6, loop_1_byte, false}, {48, loop, false},
+             {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{24, loop, false}, {64, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+
 static const
 struct processor_costs nocona_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -1389,13 +1475,8 @@ struct processor_costs nocona_cost = {
   COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
-  {{libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
-   {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
-	      {100000, unrolled_loop, false}, {-1, libcall, false}}}},
-  {{libcall, {{6, loop_1_byte, false}, {48, loop, false},
-	      {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   {libcall, {{24, loop, false}, {64, unrolled_loop, false},
-	      {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
+  nocona_memcpy,
+  nocona_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -1409,6 +1490,15 @@ struct processor_costs nocona_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+static stringop_algs atom_memcpy[2] = {
+  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static stringop_algs atom_memset[2] = {
+  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
 static const
 struct processor_costs atom_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -1462,13 +1552,8 @@ struct processor_costs atom_cost = {
   COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
-  {{libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
-   {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
-	  {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
-  {{libcall, {{8, loop, false}, {15, unrolled_loop, false},
-	  {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   {libcall, {{24, loop, false}, {32, unrolled_loop, false},
-	  {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
+  atom_memcpy,
+  atom_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -1482,6 +1567,15 @@ struct processor_costs atom_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+static stringop_algs slm_memcpy[2] = {
+  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static stringop_algs slm_memset[2] = {
+  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
 static const
 struct processor_costs slm_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -1535,13 +1629,8 @@ struct processor_costs slm_cost = {
   COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
-  {{libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
-   {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
-          {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
-  {{libcall, {{8, loop, false}, {15, unrolled_loop, false},
-          {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-   {libcall, {{24, loop, false}, {32, unrolled_loop, false},
-          {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
+  slm_memcpy,
+  slm_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -1556,6 +1645,15 @@ struct processor_costs slm_cost = {
 };
 
 /* Generic64 should produce code tuned for Nocona and K8.  */
+
+static stringop_algs generic64_memcpy[2] = {
+  DUMMY_STRINGOP_ALGS,
+  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs generic64_memset[2] = {
+  DUMMY_STRINGOP_ALGS,
+  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
 static const
 struct processor_costs generic64_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -1615,12 +1713,8 @@ struct processor_costs generic64_cost = {
   COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
-  {DUMMY_STRINGOP_ALGS,
-   {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
-  {DUMMY_STRINGOP_ALGS,
-   {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
-	      {-1, libcall, false}}}},
+  generic64_memcpy,
+  generic64_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -1635,6 +1729,18 @@ struct processor_costs generic64_cost = {
 };
 
 /* core_cost should produce code tuned for Core familly of CPUs.  */
+static stringop_algs core_memcpy[2] = {
+  {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
+  {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
+             {-1, libcall, false}}}};
+static stringop_algs core_memset[2] = {
+  {libcall, {{6, loop_1_byte, true},
+             {24, loop, true},
+             {8192, rep_prefix_4_byte, true},
+             {-1, libcall, false}}},
+  {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
+             {-1, libcall, false}}}};
+
 static const
 struct processor_costs core_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -1693,15 +1799,8 @@ struct processor_costs core_cost = {
   COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
-  {{libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
-   {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
-              {-1, libcall, false}}}},
-  {{libcall, {{6, loop_1_byte, true},
-              {24, loop, true},
-              {8192, rep_prefix_4_byte, true},
-              {-1, libcall, false}}},
-   {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
-              {-1, libcall, false}}}},
+  core_memcpy,
+  core_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -1717,6 +1816,14 @@ struct processor_costs core_cost = {
 
 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
    Athlon and K8.  */
+static stringop_algs generic32_memcpy[2] = {
+  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
+             {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs generic32_memset[2] = {
+  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
+             {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
 static const
 struct processor_costs generic32_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -1770,12 +1877,8 @@ struct processor_costs generic32_cost = {
   COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
   COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
   COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
-  {{libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
-	      {-1, libcall, false}}},
-   DUMMY_STRINGOP_ALGS},
-  {{libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
-	      {-1, libcall, false}}},
-   DUMMY_STRINGOP_ALGS},
+  generic32_memcpy,
+  generic32_memset,
   1,					/* scalar_stmt_cost.  */
   1,					/* scalar load_cost.  */
   1,					/* scalar_store_cost.  */
@@ -1833,287 +1936,23 @@ const struct processor_costs *ix86_cost = &pentium_cost;
    (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
 
+const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
+#undef DEF_TUNE
+#define DEF_TUNE(tune, name, selector) name,
+#include "x86-tune.def"
+#undef DEF_TUNE
+};
+
 /* Feature tests against the various tunings.  */
 unsigned char ix86_tune_features[X86_TUNE_LAST];
 
 /* Feature tests against the various tunings used to create ix86_tune_features
    based on the processor mask.  */
 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
-  /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
-     negatively, so enabling for Generic64 seems like good code size
-     tradeoff.  We can't enable it for 32bit generic because it does not
-     work well with PPro base chips.  */
-  m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
-
-  /* X86_TUNE_PUSH_MEMORY */
-  m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
-
-  /* X86_TUNE_ZERO_EXTEND_WITH_AND */
-  m_486 | m_PENT,
-
-  /* X86_TUNE_UNROLL_STRLEN */
-  m_486 | m_PENT | m_PPRO | m_ATOM | m_SLM | m_CORE_ALL | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
-
-  /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
-     on simulation result. But after P4 was made, no performance benefit
-     was observed with branch hints.  It also increases the code size.
-     As a result, icc never generates branch hints.  */
-  0,
-
-  /* X86_TUNE_DOUBLE_WITH_ADD */
-  ~m_386,
-
-  /* X86_TUNE_USE_SAHF */
-  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC,
-
-  /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
-     partial dependencies.  */
-  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE  | m_GENERIC,
-
-  /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
-     register stalls on Generic32 compilation setting as well.  However
-     in current implementation the partial register stalls are not eliminated
-     very well - they can be introduced via subregs synthesized by combine
-     and can happen in caller/callee saving sequences.  Because this option
-     pays back little on PPro based chips and is in conflict with partial reg
-     dependencies used by Athlon/P4 based chips, it is better to leave it off
-     for generic32 for now.  */
-  m_PPRO,
-
-  /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
-  m_CORE_ALL | m_GENERIC,
-
-  /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
-   * on 16-bit immediate moves into memory on Core2 and Corei7.  */
-  m_CORE_ALL | m_GENERIC,
-
-  /* X86_TUNE_USE_HIMODE_FIOP */
-  m_386 | m_486 | m_K6_GEODE,
-
-  /* X86_TUNE_USE_SIMODE_FIOP */
-  ~(m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC),
-
-  /* X86_TUNE_USE_MOV0 */
-  m_K6,
-
-  /* X86_TUNE_USE_CLTD */
-  ~(m_PENT | m_ATOM | m_SLM | m_K6),
-
-  /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
-  m_PENT4,
-
-  /* X86_TUNE_SPLIT_LONG_MOVES */
-  m_PPRO,
-
-  /* X86_TUNE_READ_MODIFY_WRITE */
-  ~m_PENT,
-
-  /* X86_TUNE_READ_MODIFY */
-  ~(m_PENT | m_PPRO),
-
-  /* X86_TUNE_PROMOTE_QIMODE */
-  m_386 | m_486 | m_PENT | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
-
-  /* X86_TUNE_FAST_PREFIX */
-  ~(m_386 | m_486 | m_PENT),
-
-  /* X86_TUNE_SINGLE_STRINGOP */
-  m_386 | m_P4_NOCONA,
-
-  /* X86_TUNE_QIMODE_MATH */
-  ~0,
-
-  /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
-     register stalls.  Just like X86_TUNE_PARTIAL_REG_STALL this option
-     might be considered for Generic32 if our scheme for avoiding partial
-     stalls was more effective.  */
-  ~m_PPRO,
-
-  /* X86_TUNE_PROMOTE_QI_REGS */
-  0,
-
-  /* X86_TUNE_PROMOTE_HI_REGS */
-  m_PPRO,
-
-  /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
-     over esp addition.  */
-  m_386 | m_486 | m_PENT | m_PPRO,
-
-  /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
-     over esp addition.  */
-  m_PENT,
-
-  /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
-     over esp subtraction.  */
-  m_386 | m_486 | m_PENT | m_K6_GEODE,
-
-  /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
-     over esp subtraction.  */
-  m_PENT | m_K6_GEODE,
-
-  /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
-     for DFmode copies */
-  ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC),
-
-  /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
-  m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
-
-  /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
-     conflict here in between PPro/Pentium4 based chips that thread 128bit
-     SSE registers as single units versus K8 based chips that divide SSE
-     registers to two 64bit halves.  This knob promotes all store destinations
-     to be 128bit to allow register renaming on 128bit SSE units, but usually
-     results in one extra microop on 64bit SSE units.  Experimental results
-     shows that disabling this option on P4 brings over 20% SPECfp regression,
-     while enabling it on K8 brings roughly 2.4% regression that can be partly
-     masked by careful scheduling of moves.  */
-  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMDFAM10 | m_BDVER | m_GENERIC,
-
-  /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
-  m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER | m_SLM,
-
-  /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
-  m_COREI7 | m_BDVER | m_SLM,
-
-  /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
-  m_BDVER ,
-
-  /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
-     are resolved on SSE register parts instead of whole registers, so we may
-     maintain just lower part of scalar values in proper format leaving the
-     upper part undefined.  */
-  m_ATHLON_K8,
-
-  /* X86_TUNE_SSE_TYPELESS_STORES */
-  m_AMD_MULTIPLE,
-
-  /* X86_TUNE_SSE_LOAD0_BY_PXOR */
-  m_PPRO | m_P4_NOCONA,
-
-  /* X86_TUNE_MEMORY_MISMATCH_STALL */
-  m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
-
-  /* X86_TUNE_PROLOGUE_USING_MOVE */
-  m_PPRO | m_ATHLON_K8,
-
-  /* X86_TUNE_EPILOGUE_USING_MOVE */
-  m_PPRO | m_ATHLON_K8,
-
-  /* X86_TUNE_SHIFT1 */
-  ~m_486,
-
-  /* X86_TUNE_USE_FFREEP */
-  m_AMD_MULTIPLE,
-
-  /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC */
-  ~(m_AMD_MULTIPLE | m_GENERIC),
-
-  /* X86_TUNE_INTER_UNIT_MOVES_FROM_VEC */
-  ~m_ATHLON_K8,
-
-  /* X86_TUNE_INTER_UNIT_CONVERSIONS */
-  ~(m_AMDFAM10 | m_BDVER ),
-
-  /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
-     than 4 branch instructions in the 16 byte window.  */
-  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
-
-  /* X86_TUNE_SCHEDULE */
-  m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
-
-  /* X86_TUNE_USE_BT */
-  m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
-
-  /* X86_TUNE_USE_INCDEC */
-  ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GENERIC),
-
-  /* X86_TUNE_PAD_RETURNS */
-  m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC,
-
-  /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short function.  */
-  m_ATOM,
-
-  /* X86_TUNE_EXT_80387_CONSTANTS */
-  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
-
-  /* X86_TUNE_AVOID_VECTOR_DECODE */
-  m_CORE_ALL | m_K8 | m_GENERIC64,
-
-  /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
-     and SImode multiply, but 386 and 486 do HImode multiply faster.  */
-  ~(m_386 | m_486),
-
-  /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
-     vector path on AMD machines.  */
-  m_CORE_ALL | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC64,
-
-  /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
-     machines.  */
-  m_CORE_ALL | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC64,
-
-  /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
-     than a MOV.  */
-  m_PENT,
-
-  /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
-     but one byte longer.  */
-  m_PENT,
-
-  /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
-     operand that cannot be represented using a modRM byte.  The XOR
-     replacement is long decoded, so this split helps here as well.  */
-  m_K6,
-
-  /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
-     from FP to FP. */
-  m_CORE_ALL | m_AMDFAM10 | m_GENERIC,
-
-  /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
-     from integer to FP. */
-  m_AMDFAM10,
-
-  /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
-     with a subsequent conditional jump instruction into a single
-     compare-and-branch uop.  */
-  m_BDVER,
-
-  /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
-     will impact LEA instruction selection. */
-  m_ATOM | m_SLM,
-
-  /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
-     instructions.  */
-  ~m_ATOM,
-
-  /* X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL: Enable software prefetching
-     at -O3.  For the moment, the prefetching seems badly tuned for Intel
-     chips.  */
-  m_K6_GEODE | m_AMD_MULTIPLE,
-
-  /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
-     the auto-vectorizer.  */
-  m_BDVER | m_BTVER2,
-
-  /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
-     during reassociation of integer computation.  */
-  m_ATOM,
-
-  /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
-     during reassociation of fp computation.  */
-  m_ATOM | m_SLM | m_HASWELL | m_BDVER1 | m_BDVER2,
-
-  /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
-     regs instead of memory.  */
-  m_CORE_ALL,
-
-  /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
-     a conditional move.  */
-  m_ATOM,
-
-  /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
-     fp converts to destination register.  */
-  m_SLM
-
+#undef DEF_TUNE
+#define DEF_TUNE(tune, name, selector) selector,
+#include "x86-tune.def"
+#undef DEF_TUNE
 };
 
 /* Feature tests against the various architecture variations.  */
@@ -2188,6 +2027,11 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
   /* SSE REX registers */
   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
   SSE_REGS, SSE_REGS,
+  /* AVX-512 SSE registers */
+  EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+  EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+  EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+  EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
 };
 
 /* The "default" register map used in 32bit mode.  */
@@ -2201,6 +2045,8 @@ int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
+  -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 16-23*/
+  -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 24-31*/
 };
 
 /* The "default" register map used in 64bit mode.  */
@@ -2214,6 +2060,8 @@ int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
   41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
   8,9,10,11,12,13,14,15,		/* extended integer registers */
   25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
+  67, 68, 69, 70, 71, 72, 73, 74,       /* AVX-512 registers 16-23 */
+  75, 76, 77, 78, 79, 80, 81, 82,       /* AVX-512 registers 24-31 */
 };
 
 /* Define the register numbers to be used in Dwarf debugging information.
@@ -2279,6 +2127,8 @@ int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
   29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
+  -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 16-23*/
+  -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 24-31*/
 };
 
 /* Define parameter passing and return registers.  */
@@ -2471,7 +2321,6 @@ enum ix86_function_specific_strings
 
 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
 				 const char *, enum fpmath_unit, bool);
-static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
 static void ix86_function_specific_save (struct cl_target_option *);
 static void ix86_function_specific_restore (struct cl_target_option *);
 static void ix86_function_specific_print (FILE *, int,
@@ -2578,7 +2427,7 @@ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
 static bool
 gate_insert_vzeroupper (void)
 {
-  return TARGET_AVX && TARGET_VZEROUPPER;
+  return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
 }
 
 static unsigned int
@@ -2596,31 +2445,48 @@ rest_of_handle_insert_vzeroupper (void)
   ix86_optimize_mode_switching[AVX_U128] = 1;
 
   /* Call optimize_mode_switching.  */
-  pass_mode_switching.pass.execute ();
+  g->get_passes ()->execute_pass_mode_switching ();
   return 0;
 }
 
-struct rtl_opt_pass pass_insert_vzeroupper =
+namespace {
+
+const pass_data pass_data_insert_vzeroupper =
 {
- {
-  RTL_PASS,
-  "vzeroupper",				/* name */
-  OPTGROUP_NONE,			/* optinfo_flags */
-  gate_insert_vzeroupper,		/* gate */
-  rest_of_handle_insert_vzeroupper,	/* execute */
-  NULL,					/* sub */
-  NULL,					/* next */
-  0,					/* static_pass_number */
-  TV_NONE,				/* tv_id */
-  0,					/* properties_required */
-  0,					/* properties_provided */
-  0,					/* properties_destroyed */
-  0,					/* todo_flags_start */
-  TODO_df_finish | TODO_verify_rtl_sharing |
-  0,					/* todo_flags_finish */
- }
+  RTL_PASS, /* type */
+  "vzeroupper", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_NONE, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
 };
 
+class pass_insert_vzeroupper : public rtl_opt_pass
+{
+public:
+  pass_insert_vzeroupper(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return gate_insert_vzeroupper (); }
+  unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
+
+}; // class pass_insert_vzeroupper
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_insert_vzeroupper (gcc::context *ctxt)
+{
+  return new pass_insert_vzeroupper (ctxt);
+}
+
 /* Return true if a red-zone is in use.  */
 
 static inline bool
@@ -2651,6 +2517,10 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
     { "-mfma",		OPTION_MASK_ISA_FMA },
     { "-mxop",		OPTION_MASK_ISA_XOP },
     { "-mlwp",		OPTION_MASK_ISA_LWP },
+    { "-mavx512f",	OPTION_MASK_ISA_AVX512F },
+    { "-mavx512er",	OPTION_MASK_ISA_AVX512ER },
+    { "-mavx512cd",	OPTION_MASK_ISA_AVX512CD },
+    { "-mavx512pf",	OPTION_MASK_ISA_AVX512PF },
     { "-msse4a",	OPTION_MASK_ISA_SSE4A },
     { "-msse4.2",	OPTION_MASK_ISA_SSE4_2 },
     { "-msse4.1",	OPTION_MASK_ISA_SSE4_1 },
@@ -2883,7 +2753,7 @@ ix86_profile_before_prologue (void)
 
 /* Function that is callable from the debugger to print the current
    options.  */
-void
+void ATTRIBUTE_UNUSED
 ix86_debug_options (void)
 {
   char *opts = ix86_target_string (ix86_isa_flags, target_flags,
@@ -2900,7 +2770,222 @@ ix86_debug_options (void)
 
   return;
 }
+
+static const char *stringop_alg_names[] = {
+#define DEF_ENUM
+#define DEF_ALG(alg, name) #name,
+#include "stringop.def"
+#undef DEF_ENUM
+#undef DEF_ALG
+};
+
+/* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
+   The string is of the following form (or comma separated list of it):
+
+     strategy_alg:max_size:[align|noalign]
+
+   where the full size range for the strategy is either [0, max_size] or
+   [min_size, max_size], in which min_size is the max_size + 1 of the
+   preceding range.  The last size range must have max_size == -1.
+
+   Examples:
+
+    1.
+       -mmemcpy-strategy=libcall:-1:noalign
+
+      this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
+
+
+   2.
+      -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
+
+      This is to tell the compiler to use the following strategy for memset
+      1) when the expected size is between [1, 16], use rep_8byte strategy;
+      2) when the size is between [17, 2048], use vector_loop;
+      3) when the size is > 2048, use libcall.  */
+
+struct stringop_size_range
+{
+  int max;
+  stringop_alg alg;
+  bool noalign;
+};
+
+static void
+ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
+{
+  const struct stringop_algs *default_algs;
+  stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
+  char *curr_range_str, *next_range_str;
+  int i = 0, n = 0;
+
+  if (is_memset)
+    default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
+  else
+    default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
+
+  curr_range_str = strategy_str;
+
+  do
+    {
+      int maxs;
+      stringop_alg alg;
+      char alg_name[128];
+      char align[16];
+      next_range_str = strchr (curr_range_str, ',');
+      if (next_range_str)
+        *next_range_str++ = '\0';
+
+      if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
+                       alg_name, &maxs, align))
+        {
+          error ("wrong arg %s to option %s", curr_range_str,
+                 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
+          return;
+        }
+
+      if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
+        {
+          error ("size ranges of option %s should be increasing",
+                 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
+          return;
+        }
+
+      for (i = 0; i < last_alg; i++)
+        {
+          if (!strcmp (alg_name, stringop_alg_names[i]))
+            {
+              alg = (stringop_alg) i;
+              break;
+            }
+        }
+
+      if (i == last_alg)
+        {
+          error ("wrong stringop strategy name %s specified for option %s",
+                 alg_name,
+                 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
+          return;
+        }
+
+      input_ranges[n].max = maxs;
+      input_ranges[n].alg = alg;
+      if (!strcmp (align, "align"))
+        input_ranges[n].noalign = false;
+      else if (!strcmp (align, "noalign"))
+        input_ranges[n].noalign = true;
+      else
+        {
+          error ("unknown alignment %s specified for option %s",
+                 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
+          return;
+        }
+      n++;
+      curr_range_str = next_range_str;
+    }
+  while (curr_range_str);
+
+  if (input_ranges[n - 1].max != -1)
+    {
+      error ("the max value for the last size range should be -1"
+             " for option %s",
+             is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
+      return;
+    }
+
+  if (n > MAX_STRINGOP_ALGS)
+    {
+      error ("too many size ranges specified in option %s",
+             is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
+      return;
+    }
+
+  /* Now override the default algs array.  */
+  for (i = 0; i < n; i++)
+    {
+      *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
+      *const_cast<stringop_alg *>(&default_algs->size[i].alg)
+          = input_ranges[i].alg;
+      *const_cast<int *>(&default_algs->size[i].noalign)
+          = input_ranges[i].noalign;
+    }
+}
+
 
+/* parse -mtune-ctrl= option. When DUMP is true,
+   print the features that are explicitly set.  */
+
+static void
+parse_mtune_ctrl_str (bool dump)
+{
+  if (!ix86_tune_ctrl_string)
+    return;
+
+  char *next_feature_string = NULL;
+  char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
+  char *orig = curr_feature_string;
+  int i;
+  do
+    {
+      bool clear = false;
+
+      next_feature_string = strchr (curr_feature_string, ',');
+      if (next_feature_string)
+        *next_feature_string++ = '\0';
+      if (*curr_feature_string == '^')
+        {
+          curr_feature_string++;
+          clear = true;
+        }
+      for (i = 0; i < X86_TUNE_LAST; i++)
+        {
+          if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
+            {
+              ix86_tune_features[i] = !clear;
+              if (dump)
+                fprintf (stderr, "Explicitly %s feature %s\n",
+                         clear ? "clear" : "set", ix86_tune_feature_names[i]);
+              break;
+            }
+        }
+      if (i == X86_TUNE_LAST)
+        error ("Unknown parameter to option -mtune-ctrl: %s",
+               clear ? curr_feature_string - 1 : curr_feature_string);
+      curr_feature_string = next_feature_string;
+    }
+  while (curr_feature_string);
+  free (orig);
+}
+
+/* Helper function to set ix86_tune_features. IX86_TUNE is the
+   processor type.  */
+
+static void
+set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
+{
+  unsigned int ix86_tune_mask = 1u << ix86_tune;
+  int i;
+
+  for (i = 0; i < X86_TUNE_LAST; ++i)
+    {
+      if (ix86_tune_no_default)
+        ix86_tune_features[i] = 0;
+      else
+        ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
+    }
+
+  if (dump)
+    {
+      fprintf (stderr, "List of x86 specific tuning parameter names:\n");
+      for (i = 0; i < X86_TUNE_LAST; i++)
+        fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
+                 ix86_tune_features[i] ? "on" : "off");
+    }
+
+  parse_mtune_ctrl_str (dump);
+}
+
+
 /* Override various settings based on options.  If MAIN_ARGS_P, the
    options are from the command line, otherwise they are from
    attributes.  */
@@ -2955,6 +3040,10 @@ ix86_option_override_internal (bool main_args_p)
 #define PTA_FXSR		(HOST_WIDE_INT_1 << 37)
 #define PTA_XSAVE		(HOST_WIDE_INT_1 << 38)
 #define PTA_XSAVEOPT		(HOST_WIDE_INT_1 << 39)
+#define PTA_AVX512F		(HOST_WIDE_INT_1 << 40)
+#define PTA_AVX512ER		(HOST_WIDE_INT_1 << 41)
+#define PTA_AVX512PF		(HOST_WIDE_INT_1 << 42)
+#define PTA_AVX512CD		(HOST_WIDE_INT_1 << 43)
 
 /* if this reaches 64, need to widen struct pta flags below */
 
@@ -3476,6 +3565,18 @@ ix86_option_override_internal (bool main_args_p)
 	if (processor_alias_table[i].flags & PTA_XSAVEOPT
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
 	  ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
+	if (processor_alias_table[i].flags & PTA_AVX512F
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
+	  ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
+	if (processor_alias_table[i].flags & PTA_AVX512ER
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
+	  ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
+	if (processor_alias_table[i].flags & PTA_AVX512PF
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
+	  ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
+	if (processor_alias_table[i].flags & PTA_AVX512CD
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
+	  ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
 	if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
 	  x86_prefetch_sse = true;
 
@@ -3546,9 +3647,7 @@ ix86_option_override_internal (bool main_args_p)
     error ("bad value (%s) for %stune=%s %s",
 	   ix86_tune_string, prefix, suffix, sw);
 
-  ix86_tune_mask = 1u << ix86_tune;
-  for (i = 0; i < X86_TUNE_LAST; ++i)
-    ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
+  set_ix86_tune_features (ix86_tune, ix86_dump_tunes);
 
 #ifndef USE_IX86_FRAME_POINTER
 #define USE_IX86_FRAME_POINTER 0
@@ -3784,6 +3883,7 @@ ix86_option_override_internal (bool main_args_p)
 	gcc_unreachable ();
       }
 
+  ix86_tune_mask = 1u << ix86_tune;
   if ((!USE_IX86_FRAME_POINTER
        || (x86_accumulate_outgoing_args & ix86_tune_mask))
       && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
@@ -3866,24 +3966,19 @@ ix86_option_override_internal (bool main_args_p)
       ix86_gen_leave = gen_leave_rex64;
       if (Pmode == DImode)
 	{
-	  ix86_gen_monitor = gen_sse3_monitor64_di;
 	  ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
 	  ix86_gen_tls_local_dynamic_base_64
 	    = gen_tls_local_dynamic_base_64_di;
 	}
       else
 	{
-	  ix86_gen_monitor = gen_sse3_monitor64_si;
 	  ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
 	  ix86_gen_tls_local_dynamic_base_64
 	    = gen_tls_local_dynamic_base_64_si;
 	}
     }
   else
-    {
-      ix86_gen_leave = gen_leave;
-      ix86_gen_monitor = gen_sse3_monitor;
-    }
+    ix86_gen_leave = gen_leave;
 
   if (Pmode == DImode)
     {
@@ -3895,6 +3990,7 @@ ix86_option_override_internal (bool main_args_p)
       ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
       ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
       ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
+      ix86_gen_monitor = gen_sse3_monitor_di;
     }
   else
     {
@@ -3906,6 +4002,7 @@ ix86_option_override_internal (bool main_args_p)
       ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
       ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
       ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
+      ix86_gen_monitor = gen_sse3_monitor_si;
     }
 
 #ifdef USE_IX86_CLD
@@ -3940,22 +4037,22 @@ ix86_option_override_internal (bool main_args_p)
      TARGET_AVX with -fexpensive-optimizations and split 32-byte
      AVX unaligned load/store.  */
   if (!optimize_size)
-  {
-     if (flag_expensive_optimizations
-	   && !(target_flags_explicit & MASK_VZEROUPPER))
+    {
+      if (flag_expensive_optimizations
+	  && !(target_flags_explicit & MASK_VZEROUPPER))
 	target_flags |= MASK_VZEROUPPER;
-     if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
-	   && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
+      if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
+	  && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
 	target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
-     if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
-	   && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
+      if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
+	  && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
 	target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
-     /* Enable 128-bit AVX instruction generation
-	for the auto-vectorizer.  */
-     if (TARGET_AVX128_OPTIMAL
-	   && !(target_flags_explicit & MASK_PREFER_AVX128))
+      /* Enable 128-bit AVX instruction generation
+         for the auto-vectorizer.  */
+      if (TARGET_AVX128_OPTIMAL
+	  && !(target_flags_explicit & MASK_PREFER_AVX128))
 	target_flags |= MASK_PREFER_AVX128;
-  }
+    }
 
   if (ix86_recip_name)
     {
@@ -4021,6 +4118,21 @@ ix86_option_override_internal (bool main_args_p)
   /* Handle stack protector */
   if (!global_options_set.x_ix86_stack_protector_guard)
     ix86_stack_protector_guard = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
+
+  /* Handle -mmemcpy-strategy= and -mmemset-strategy=  */
+  if (ix86_tune_memcpy_strategy)
+    {
+      char *str = xstrdup (ix86_tune_memcpy_strategy);
+      ix86_parse_stringop_strategy_string (str, false);
+      free (str);
+    }
+
+  if (ix86_tune_memset_strategy)
+    {
+      char *str = xstrdup (ix86_tune_memset_strategy);
+      ix86_parse_stringop_strategy_string (str, true);
+      free (str);
+    }
 }
 
 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
@@ -4028,8 +4140,9 @@ ix86_option_override_internal (bool main_args_p)
 static void
 ix86_option_override (void)
 {
+  opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
   static struct register_pass_info insert_vzeroupper_info
-    = { &pass_insert_vzeroupper.pass, "reload",
+    = { pass_insert_vzeroupper, "reload",
 	1, PASS_POS_INSERT_AFTER
       };
 
@@ -4060,6 +4173,8 @@ ix86_conditional_register_usage (void)
 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
       for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+      for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
+	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
     }
 
   /*  See the definition of CALL_USED_REGISTERS in i386.h.  */
@@ -4100,6 +4215,11 @@ ix86_conditional_register_usage (void)
     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
       if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+
+  /* If AVX512F is disabled, squash the registers.  */
+  if (! TARGET_AVX512F)
+    for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
+      fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
 }
 
 
@@ -4133,7 +4253,7 @@ ix86_function_specific_restore (struct cl_target_option *ptr)
 {
   enum processor_type old_tune = ix86_tune;
   enum processor_type old_arch = ix86_arch;
-  unsigned int ix86_arch_mask, ix86_tune_mask;
+  unsigned int ix86_arch_mask;
   int i;
 
   ix86_arch = (enum processor_type) ptr->arch;
@@ -4157,12 +4277,7 @@ ix86_function_specific_restore (struct cl_target_option *ptr)
 
   /* Recreate the tune optimization tests */
   if (old_tune != ix86_tune)
-    {
-      ix86_tune_mask = 1u << ix86_tune;
-      for (i = 0; i < X86_TUNE_LAST; ++i)
-	ix86_tune_features[i]
-	  = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
-    }
+    set_ix86_tune_features (ix86_tune, false);
 }
 
 /* Print the current options */
@@ -4244,6 +4359,10 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
     IX86_ATTR_ISA ("aes",	OPT_maes),
     IX86_ATTR_ISA ("avx",	OPT_mavx),
     IX86_ATTR_ISA ("avx2",	OPT_mavx2),
+    IX86_ATTR_ISA ("avx512f",	OPT_mavx512f),
+    IX86_ATTR_ISA ("avx512pf",	OPT_mavx512pf),
+    IX86_ATTR_ISA ("avx512er",	OPT_mavx512er),
+    IX86_ATTR_ISA ("avx512cd",	OPT_mavx512cd),
     IX86_ATTR_ISA ("mmx",	OPT_mmmx),
     IX86_ATTR_ISA ("pclmul",	OPT_mpclmul),
     IX86_ATTR_ISA ("popcnt",	OPT_mpopcnt),
@@ -4728,10 +4847,7 @@ ix86_in_large_data_p (tree exp)
    RELOC indicates whether forming the initial value of DECL requires
    link-time relocations.  */
 
-static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
-	ATTRIBUTE_UNUSED;
-
-static section *
+ATTRIBUTE_UNUSED static section *
 x86_64_elf_select_section (tree decl, int reloc,
 			   unsigned HOST_WIDE_INT align)
 {
@@ -4792,6 +4908,28 @@ x86_64_elf_select_section (tree decl, int reloc,
   return default_elf_select_section (decl, reloc, align);
 }
 
+/* Select a set of attributes for section NAME based on the properties
+   of DECL and whether or not RELOC indicates that DECL's initializer
+   might contain runtime relocations.  */
+
+static unsigned int ATTRIBUTE_UNUSED
+x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = default_section_type_flags (decl, name, reloc);
+
+  if (decl == NULL_TREE
+      && (strcmp (name, ".ldata.rel.ro") == 0
+	  || strcmp (name, ".ldata.rel.ro.local") == 0))
+    flags |= SECTION_RELRO;
+
+  if (strcmp (name, ".lbss") == 0
+      || strncmp (name, ".lbss.", 5) == 0
+      || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
+    flags |= SECTION_BSS;
+
+  return flags;
+}
+
 /* Build up a unique section name, expressed as a
    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
    RELOC indicates whether the initial value of EXP requires
@@ -5161,8 +5299,7 @@ ix86_handle_cconv_attribute (tree *node, tree name,
 static tree
 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
     				  tree args ATTRIBUTE_UNUSED,
-				  int flags ATTRIBUTE_UNUSED,
-				  bool *no_add_attrs)
+				  int flags, bool *no_add_attrs)
 {
   tree alt;
 
@@ -5651,6 +5788,14 @@ ix86_function_type_abi (const_tree fntype)
   return ix86_abi;
 }
 
+/* We add this as a workaround in order to use libc_has_function
+   hook in i386.md.  */
+bool
+ix86_libc_has_function (enum function_class fn_class)
+{
+  return targetm.libc_has_function (fn_class);
+}
+
 static bool
 ix86_function_ms_hook_prologue (const_tree fn)
 {
@@ -7078,8 +7223,7 @@ ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
    appropriate for passing a pointer to that type.  */
 
 static bool
-ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
-			enum machine_mode mode ATTRIBUTE_UNUSED,
+ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
 			const_tree type, bool named ATTRIBUTE_UNUSED)
 {
   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
@@ -8562,6 +8706,10 @@ standard_sse_constant_opcode (rtx insn, rtx x)
 	}
 
     case 2:
+      if (get_attr_mode (insn) == MODE_XI
+	  || get_attr_mode (insn) == MODE_V8DF
+	  || get_attr_mode (insn) == MODE_V16SF)
+	return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
       if (TARGET_AVX)
 	return "vpcmpeqd\t%0, %0, %0";
       else
@@ -8796,7 +8944,7 @@ ix86_code_end (void)
 /* Emit code for the SET_GOT patterns.  */
 
 const char *
-output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
+output_set_got (rtx dest, rtx label)
 {
   rtx xops[3];
 
@@ -12900,6 +13048,14 @@ ix86_tls_get_addr (void)
       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
     }
 
+  if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
+    {
+      rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
+				   UNSPEC_PLTOFF);
+      return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+			   gen_rtx_CONST (Pmode, unspec));
+    }
+
   return ix86_tls_symbol;
 }
 
@@ -13807,21 +13963,29 @@ ix86_delegitimize_address (rtx x)
 	    x = replace_equiv_address_nv (orig_x, x);
 	  return x;
 	}
-      if (GET_CODE (x) != CONST
-	  || GET_CODE (XEXP (x, 0)) != UNSPEC
-	  || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
-	      && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
-	  || (!MEM_P (orig_x) && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL))
-	return ix86_delegitimize_tls_address (orig_x);
-      x = XVECEXP (XEXP (x, 0), 0, 0);
-      if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
+
+      if (GET_CODE (x) == CONST
+	  && GET_CODE (XEXP (x, 0)) == UNSPEC
+	  && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
+	      || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
+	  && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
 	{
-	  x = simplify_gen_subreg (GET_MODE (orig_x), x,
-				   GET_MODE (x), 0);
-	  if (x == NULL_RTX)
-	    return orig_x;
+	  x = XVECEXP (XEXP (x, 0), 0, 0);
+	  if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
+	    {
+	      x = simplify_gen_subreg (GET_MODE (orig_x), x,
+				       GET_MODE (x), 0);
+	      if (x == NULL_RTX)
+		return orig_x;
+	    }
+	  return x;
 	}
-      return x;
+
+      if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
+	return ix86_delegitimize_tls_address (orig_x);
+
+      /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
+	 and -mcmodel=medium -fpic.  */
     }
 
   if (GET_CODE (x) != PLUS
@@ -13858,10 +14022,12 @@ ix86_delegitimize_address (rtx x)
 
   if (GET_CODE (x) == UNSPEC
       && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
-	  || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
+	  || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
+	  || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
+	      && !MEM_P (orig_x) && !addend)))
     result = XVECEXP (x, 0, 0);
 
-  if (TARGET_MACHO && darwin_local_data_pic (x)
+  if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
       && !MEM_P (orig_x))
     result = XVECEXP (x, 0, 0);
 
@@ -14077,6 +14243,7 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
    If CODE is 'q', pretend the mode is DImode.
    If CODE is 'x', pretend the mode is V4SFmode.
    If CODE is 't', pretend the mode is V8SFmode.
+   If CODE is 'g', pretend the mode is V16SFmode.
    If CODE is 'h', pretend the reg is the 'high' byte register.
    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
    If CODE is 'd', duplicate the operand for AVX instruction.
@@ -14122,6 +14289,8 @@ print_reg (rtx x, int code, FILE *file)
     code = 16;
   else if (code == 't')
     code = 32;
+  else if (code == 'g')
+    code = 64;
   else
     code = GET_MODE_SIZE (GET_MODE (x));
 
@@ -14195,6 +14364,14 @@ print_reg (rtx x, int code, FILE *file)
 	  fputs (hi_reg_name[regno] + 1, file);
 	  return;
 	}
+    case 64:
+      if (SSE_REG_P (x))
+        {
+          gcc_assert (!duplicated);
+          putc ('z', file);
+          fputs (hi_reg_name[REGNO (x)] + 1, file);
+          return;
+        }
       break;
     default:
       gcc_unreachable ();
@@ -14268,6 +14445,7 @@ get_some_local_dynamic_name (void)
    q --  likewise, print the DImode name of the register.
    x --  likewise, print the V4SFmode name of the register.
    t --  likewise, print the V8SFmode name of the register.
+   g --  likewise, print the V16SFmode name of the register.
    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
    y -- print "st(0)" instead of "st" as a register.
    d -- print duplicated register operand for AVX instruction.
@@ -14497,6 +14675,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	case 'q':
 	case 'h':
 	case 't':
+	case 'g':
 	case 'y':
 	case 'x':
 	case 'X':
@@ -14805,6 +14984,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
 		size = "XMMWORD";
               break;
 	    case 32: size = "YMMWORD"; break;
+	    case 64: size = "ZMMWORD"; break;
 	    default:
 	      gcc_unreachable ();
 	    }
@@ -17882,7 +18062,7 @@ ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
 bool
 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
 			enum machine_mode mode ATTRIBUTE_UNUSED,
-			rtx operands[2] ATTRIBUTE_UNUSED)
+			rtx operands[2])
 {
   /* If one of operands is memory, source and destination must match.  */
   if ((MEM_P (operands[0])
@@ -22903,6 +23083,7 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
     {
     case libcall:
     case no_stringop:
+    case last_alg:
       gcc_unreachable ();
     case loop_1_byte:
       need_zero_guard = true;
@@ -23093,6 +23274,7 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
     {
     case libcall:
     case no_stringop:
+    case last_alg:
       gcc_unreachable ();
     case loop_1_byte:
     case loop:
@@ -23304,6 +23486,7 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
     {
     case libcall:
     case no_stringop:
+    case last_alg:
       gcc_unreachable ();
     case loop:
       need_zero_guard = true;
@@ -23481,6 +23664,7 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
     {
     case libcall:
     case no_stringop:
+    case last_alg:
       gcc_unreachable ();
     case loop_1_byte:
     case loop:
@@ -29546,8 +29730,8 @@ ix86_function_versions (tree fn1, tree fn2)
 	  error_at (DECL_SOURCE_LOCATION (fn2),
 		    "missing %<target%> attribute for multi-versioned %D",
 		    fn2);
-	  error_at (DECL_SOURCE_LOCATION (fn1),
-		    "previous declaration of %D", fn1);
+	  inform (DECL_SOURCE_LOCATION (fn1),
+		  "previous declaration of %D", fn1);
 	  /* Prevent diagnosing of the same error multiple times.  */
 	  DECL_ATTRIBUTES (fn2)
 	    = tree_cons (get_identifier ("target"),
@@ -29830,7 +30014,7 @@ make_resolver_func (const tree default_decl,
   DECL_IGNORED_P (decl) = 0;
   /* IFUNC resolvers have to be externally visible.  */
   TREE_PUBLIC (decl) = 1;
-  DECL_UNINLINABLE (decl) = 0;
+  DECL_UNINLINABLE (decl) = 1;
 
   /* Resolver is not external, body is generated.  */
   DECL_EXTERNAL (decl) = 0;
@@ -31972,9 +32156,8 @@ ix86_expand_vec_set_builtin (tree exp)
    IGNORE is nonzero if the value is to be ignored.  */
 
 static rtx
-ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
-		     enum machine_mode mode ATTRIBUTE_UNUSED,
-		     int ignore ATTRIBUTE_UNUSED)
+ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
+		     enum machine_mode mode, int ignore)
 {
   const struct builtin_description *d;
   size_t i;
@@ -33798,7 +33981,7 @@ ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
      alternative: if reload cannot do this, it will still use its choice.  */
   mode = GET_MODE (x);
   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
-    return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
+    return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
 
   if (X87_FLOAT_MODE_P (mode))
     {
@@ -33822,7 +34005,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
   if (TARGET_64BIT
       && MEM_P (x)
       && GET_MODE_SIZE (mode) > UNITS_PER_WORD
-      && rclass == GENERAL_REGS
+      && INTEGER_CLASS_P (rclass)
       && !offsettable_memref_p (x))
     {
       sri->icode = (in_p
@@ -33838,12 +34021,8 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
      intermediate register on 32bit targets.  */
   if (!TARGET_64BIT
       && !in_p && mode == QImode
-      && (rclass == GENERAL_REGS
-	  || rclass == LEGACY_REGS
-	  || rclass == NON_Q_REGS
-	  || rclass == SIREG
-	  || rclass == DIREG
-	  || rclass == INDEX_REGS))
+      && INTEGER_CLASS_P (rclass)
+      && MAYBE_NON_Q_CLASS_P (rclass))
     {
       int regno;
 
@@ -34269,10 +34448,25 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
     {
       /* We implement the move patterns for all vector modes into and
 	 out of SSE registers, even when no operation instructions
-	 are available.  OImode move is available only when AVX is
-	 enabled.  */
+	 are available.  */
+
+      /* For AVX-512 we allow, regardless of regno:
+	  - XI mode
+	  - any of 512-bit wide vector mode
+	  - any scalar mode.  */
+      if (TARGET_AVX512F
+	  && (mode == XImode
+	      || VALID_AVX512F_REG_MODE (mode)
+	      || VALID_AVX512F_SCALAR_MODE (mode)))
+	return true;
+
+      /* xmm16-xmm31 are only available for AVX-512.  */
+      if (EXT_REX_SSE_REGNO_P (regno))
+	return false;
+
+      /* OImode move is available only when AVX is enabled.  */
       return ((TARGET_AVX && mode == OImode)
-	      || VALID_AVX256_REG_MODE (mode)
+	      || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
 	      || VALID_SSE_REG_MODE (mode)
 	      || VALID_SSE2_REG_MODE (mode)
 	      || VALID_MMX_REG_MODE (mode)
@@ -34422,7 +34616,8 @@ ix86_set_reg_reg_cost (enum machine_mode mode)
 
     case MODE_VECTOR_INT:
     case MODE_VECTOR_FLOAT:
-      if ((TARGET_AVX && VALID_AVX256_REG_MODE (mode))
+      if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
+	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
 	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
 	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
 	  || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
@@ -35054,6 +35249,10 @@ x86_order_regs_for_local_alloc (void)
    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
      reg_alloc_order [pos++] = i;
 
+   /* Extended REX SSE registers.  */
+   for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
+     reg_alloc_order [pos++] = i;
+
    /* x87 registers.  */
    if (TARGET_SSE_MATH)
      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
@@ -36051,9 +36250,9 @@ x86_emit_floatuns (rtx operands[2])
   emit_label (donelab);
 }
 
-/* AVX2 does support 32-byte integer vector operations,
-   thus the longest vector we are faced with is V32QImode.  */
-#define MAX_VECT_LEN	32
+/* AVX512F does support 64-byte integer vector operations,
+   thus the longest vector we are faced with is V64QImode.  */
+#define MAX_VECT_LEN	64
 
 struct expand_vec_perm_d
 {
@@ -42590,7 +42789,7 @@ ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
   if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
       && (mode == SImode || (TARGET_64BIT && mode == DImode))
       && INTEGER_CLASS_P (rclass))
-    return SSE_REGS;
+    return ALL_SSE_REGS;
   return NO_REGS;
 }
 
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 7d940f98804..e820aa65ac5 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -51,6 +51,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_SSE4_2	TARGET_ISA_SSE4_2
 #define TARGET_AVX	TARGET_ISA_AVX
 #define TARGET_AVX2	TARGET_ISA_AVX2
+#define TARGET_AVX512F  TARGET_ISA_AVX512F
+#define TARGET_AVX512PF TARGET_ISA_AVX512PF
+#define TARGET_AVX512ER TARGET_ISA_AVX512ER
+#define TARGET_AVX512CD TARGET_ISA_AVX512CD
 #define TARGET_FMA	TARGET_ISA_FMA
 #define TARGET_SSE4A	TARGET_ISA_SSE4A
 #define TARGET_FMA4	TARGET_ISA_FMA4
@@ -170,7 +174,7 @@ struct processor_costs {
   const int fsqrt;		/* cost of FSQRT instruction.  */
 				/* Specify what algorithm
 				   to use for stringops on unknown size.  */
-  struct stringop_algs memcpy[2], memset[2];
+  struct stringop_algs *memcpy, *memset;
   const int scalar_stmt_cost;   /* Cost of any scalar operation, excluding
 				   load and store.  */
   const int scalar_load_cost;   /* Cost of scalar load.  */
@@ -261,81 +265,11 @@ extern const struct processor_costs ix86_size_cost;
 
 /* Feature tests against the various tunings.  */
 enum ix86_tune_indices {
-  X86_TUNE_USE_LEAVE,
-  X86_TUNE_PUSH_MEMORY,
-  X86_TUNE_ZERO_EXTEND_WITH_AND,
-  X86_TUNE_UNROLL_STRLEN,
-  X86_TUNE_BRANCH_PREDICTION_HINTS,
-  X86_TUNE_DOUBLE_WITH_ADD,
-  X86_TUNE_USE_SAHF,
-  X86_TUNE_MOVX,
-  X86_TUNE_PARTIAL_REG_STALL,
-  X86_TUNE_PARTIAL_FLAG_REG_STALL,
-  X86_TUNE_LCP_STALL,
-  X86_TUNE_USE_HIMODE_FIOP,
-  X86_TUNE_USE_SIMODE_FIOP,
-  X86_TUNE_USE_MOV0,
-  X86_TUNE_USE_CLTD,
-  X86_TUNE_USE_XCHGB,
-  X86_TUNE_SPLIT_LONG_MOVES,
-  X86_TUNE_READ_MODIFY_WRITE,
-  X86_TUNE_READ_MODIFY,
-  X86_TUNE_PROMOTE_QIMODE,
-  X86_TUNE_FAST_PREFIX,
-  X86_TUNE_SINGLE_STRINGOP,
-  X86_TUNE_QIMODE_MATH,
-  X86_TUNE_HIMODE_MATH,
-  X86_TUNE_PROMOTE_QI_REGS,
-  X86_TUNE_PROMOTE_HI_REGS,
-  X86_TUNE_SINGLE_POP,
-  X86_TUNE_DOUBLE_POP,
-  X86_TUNE_SINGLE_PUSH,
-  X86_TUNE_DOUBLE_PUSH,
-  X86_TUNE_INTEGER_DFMODE_MOVES,
-  X86_TUNE_PARTIAL_REG_DEPENDENCY,
-  X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY,
-  X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL,
-  X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL,
-  X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL,
-  X86_TUNE_SSE_SPLIT_REGS,
-  X86_TUNE_SSE_TYPELESS_STORES,
-  X86_TUNE_SSE_LOAD0_BY_PXOR,
-  X86_TUNE_MEMORY_MISMATCH_STALL,
-  X86_TUNE_PROLOGUE_USING_MOVE,
-  X86_TUNE_EPILOGUE_USING_MOVE,
-  X86_TUNE_SHIFT1,
-  X86_TUNE_USE_FFREEP,
-  X86_TUNE_INTER_UNIT_MOVES_TO_VEC,
-  X86_TUNE_INTER_UNIT_MOVES_FROM_VEC,
-  X86_TUNE_INTER_UNIT_CONVERSIONS,
-  X86_TUNE_FOUR_JUMP_LIMIT,
-  X86_TUNE_SCHEDULE,
-  X86_TUNE_USE_BT,
-  X86_TUNE_USE_INCDEC,
-  X86_TUNE_PAD_RETURNS,
-  X86_TUNE_PAD_SHORT_FUNCTION,
-  X86_TUNE_EXT_80387_CONSTANTS,
-  X86_TUNE_AVOID_VECTOR_DECODE,
-  X86_TUNE_PROMOTE_HIMODE_IMUL,
-  X86_TUNE_SLOW_IMUL_IMM32_MEM,
-  X86_TUNE_SLOW_IMUL_IMM8,
-  X86_TUNE_MOVE_M1_VIA_OR,
-  X86_TUNE_NOT_UNPAIRABLE,
-  X86_TUNE_NOT_VECTORMODE,
-  X86_TUNE_USE_VECTOR_FP_CONVERTS,
-  X86_TUNE_USE_VECTOR_CONVERTS,
-  X86_TUNE_FUSE_CMP_AND_BRANCH,
-  X86_TUNE_OPT_AGU,
-  X86_TUNE_VECTORIZE_DOUBLE,
-  X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL,
-  X86_TUNE_AVX128_OPTIMAL,
-  X86_TUNE_REASSOC_INT_TO_PARALLEL,
-  X86_TUNE_REASSOC_FP_TO_PARALLEL,
-  X86_TUNE_GENERAL_REGS_SSE_SPILL,
-  X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE,
-  X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS,
-
-  X86_TUNE_LAST
+#undef DEF_TUNE
+#define DEF_TUNE(tune, name, selector) tune,
+#include "x86-tune.def"
+#undef DEF_TUNE
+X86_TUNE_LAST
 };
 
 extern unsigned char ix86_tune_features[X86_TUNE_LAST];
@@ -802,7 +736,8 @@ enum target_cpu_default
    Pentium+ prefers DFmode values to be aligned to 64 bit boundary
    and Pentium Pro XFmode values at 128 bit boundaries.  */
 
-#define BIGGEST_ALIGNMENT (TARGET_AVX ? 256 : 128)
+#define BIGGEST_ALIGNMENT \
+  (TARGET_AVX512F ? 512 : (TARGET_AVX ? 256 : 128))
 
 /* Maximum stack alignment.  */
 #define MAX_STACK_ALIGNMENT MAX_OFILE_ALIGNMENT
@@ -958,7 +893,7 @@ enum target_cpu_default
    eliminated during reloading in favor of either the stack or frame
    pointer.  */
 
-#define FIRST_PSEUDO_REGISTER 53
+#define FIRST_PSEUDO_REGISTER 69
 
 /* Number of hardware registers that go into the DWARF-2 unwind info.
    If not defined, equals FIRST_PSEUDO_REGISTER.  */
@@ -984,6 +919,10 @@ enum target_cpu_default
 /*  r8,  r9, r10, r11, r12, r13, r14, r15*/			\
      0,   0,   0,   0,   0,   0,   0,   0,			\
 /*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/		\
+     0,   0,    0,    0,    0,    0,    0,    0,		\
+/*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/		\
+     0,   0,    0,    0,    0,    0,    0,    0,		\
+/*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/		\
      0,   0,    0,    0,    0,    0,    0,    0 }
 
 /* 1 for registers not available across function calls.
@@ -1012,7 +951,11 @@ enum target_cpu_default
 /*  r8,  r9, r10, r11, r12, r13, r14, r15*/			\
      1,   1,   1,   1,   2,   2,   2,   2,			\
 /*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/		\
-     6,   6,    6,    6,    6,    6,    6,    6 }
+     6,   6,    6,    6,    6,    6,    6,    6,		\
+/*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/		\
+     6,    6,     6,    6,    6,    6,    6,    6,		\
+/*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/		\
+     6,    6,     6,    6,    6,    6,    6,    6 }
 
 /* Order in which to allocate registers.  Each register must be
    listed once, even those in FIXED_REGISTERS.  List frame pointer
@@ -1027,7 +970,8 @@ enum target_cpu_default
 {  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\
    18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,	\
    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,  \
-   48, 49, 50, 51, 52 }
+   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,	\
+   63, 64, 65, 66, 67, 68 }
 
 /* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order
    to be rearranged based on a particular function.  When using sse math,
@@ -1073,6 +1017,14 @@ enum target_cpu_default
 #define VALID_AVX256_REG_OR_OI_MODE(MODE)					\
   (VALID_AVX256_REG_MODE (MODE) || (MODE) == OImode)
 
+#define VALID_AVX512F_SCALAR_MODE(MODE)					\
+  ((MODE) == DImode || (MODE) == DFmode || (MODE) == SImode		\
+   || (MODE) == SFmode)
+
+#define VALID_AVX512F_REG_MODE(MODE)					\
+  ((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode	\
+   || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode)
+
 #define VALID_SSE2_REG_MODE(MODE)					\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
    || (MODE) == V2DImode || (MODE) == DFmode)
@@ -1112,7 +1064,9 @@ enum target_cpu_default
    || (MODE) == V2DImode || (MODE) == V4SFmode || (MODE) == V4SImode	\
    || (MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode	\
    || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode	\
-   || (MODE) == V2TImode)
+   || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode	\
+   || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode	\
+   || (MODE) == V16SFmode)
 
 /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.  */
 
@@ -1175,15 +1129,18 @@ enum target_cpu_default
 #define FIRST_SSE_REG (FRAME_POINTER_REGNUM + 1)
 #define LAST_SSE_REG  (FIRST_SSE_REG + 7)
 
-#define FIRST_MMX_REG  (LAST_SSE_REG + 1)
+#define FIRST_MMX_REG  (LAST_SSE_REG + 1)   /*29*/
 #define LAST_MMX_REG   (FIRST_MMX_REG + 7)
 
-#define FIRST_REX_INT_REG  (LAST_MMX_REG + 1)
+#define FIRST_REX_INT_REG  (LAST_MMX_REG + 1) /*37*/
 #define LAST_REX_INT_REG   (FIRST_REX_INT_REG + 7)
 
-#define FIRST_REX_SSE_REG  (LAST_REX_INT_REG + 1)
+#define FIRST_REX_SSE_REG  (LAST_REX_INT_REG + 1) /*45*/
 #define LAST_REX_SSE_REG   (FIRST_REX_SSE_REG + 7)
 
+#define FIRST_EXT_REX_SSE_REG  (LAST_REX_SSE_REG + 1) /*53*/
+#define LAST_EXT_REX_SSE_REG   (FIRST_EXT_REX_SSE_REG + 15) /*68*/
+
 /* Override this in other tm.h files to cope with various OS lossage
    requiring a frame pointer.  */
 #ifndef SUBTARGET_FRAME_POINTER_REQUIRED
@@ -1263,6 +1220,8 @@ enum reg_class
   FLOAT_REGS,
   SSE_FIRST_REG,
   SSE_REGS,
+  EVEX_SSE_REGS,
+  ALL_SSE_REGS,
   MMX_REGS,
   FP_TOP_SSE_REGS,
   FP_SECOND_SSE_REGS,
@@ -1280,7 +1239,7 @@ enum reg_class
 #define FLOAT_CLASS_P(CLASS) \
   reg_class_subset_p ((CLASS), FLOAT_REGS)
 #define SSE_CLASS_P(CLASS) \
-  reg_class_subset_p ((CLASS), SSE_REGS)
+  reg_class_subset_p ((CLASS), ALL_SSE_REGS)
 #define MMX_CLASS_P(CLASS) \
   ((CLASS) == MMX_REGS)
 #define MAYBE_INTEGER_CLASS_P(CLASS) \
@@ -1288,13 +1247,16 @@ enum reg_class
 #define MAYBE_FLOAT_CLASS_P(CLASS) \
   reg_classes_intersect_p ((CLASS), FLOAT_REGS)
 #define MAYBE_SSE_CLASS_P(CLASS) \
-  reg_classes_intersect_p (SSE_REGS, (CLASS))
+  reg_classes_intersect_p ((CLASS), ALL_SSE_REGS)
 #define MAYBE_MMX_CLASS_P(CLASS) \
-  reg_classes_intersect_p (MMX_REGS, (CLASS))
+  reg_classes_intersect_p ((CLASS), MMX_REGS)
 
 #define Q_CLASS_P(CLASS) \
   reg_class_subset_p ((CLASS), Q_REGS)
 
+#define MAYBE_NON_Q_CLASS_P(CLASS) \
+  reg_classes_intersect_p ((CLASS), NON_Q_REGS)
+
 /* Give names of register classes as strings for dump file.  */
 
 #define REG_CLASS_NAMES \
@@ -1311,6 +1273,8 @@ enum reg_class
    "FLOAT_REGS",			\
    "SSE_FIRST_REG",			\
    "SSE_REGS",				\
+   "EVEX_SSE_REGS",			\
+   "ALL_SSE_REGS",			\
    "MMX_REGS",				\
    "FP_TOP_SSE_REGS",			\
    "FP_SECOND_SSE_REGS",		\
@@ -1326,30 +1290,36 @@ enum reg_class
    Note that CLOBBERED_REGS are calculated by
    TARGET_CONDITIONAL_REGISTER_USAGE.  */
 
-#define REG_CLASS_CONTENTS						\
-{     { 0x00,     0x0 },						\
-      { 0x01,     0x0 }, { 0x02, 0x0 },	/* AREG, DREG */		\
-      { 0x04,     0x0 }, { 0x08, 0x0 },	/* CREG, BREG */		\
-      { 0x10,     0x0 }, { 0x20, 0x0 },	/* SIREG, DIREG */		\
-      { 0x03,     0x0 },		/* AD_REGS */			\
-      { 0x0f,     0x0 },		/* Q_REGS */			\
-  { 0x1100f0,  0x1fe0 },		/* NON_Q_REGS */		\
-      { 0x7f,  0x1fe0 },		/* INDEX_REGS */		\
-  { 0x1100ff,     0x0 },		/* LEGACY_REGS */		\
-      { 0x00,     0x0 },		/* CLOBBERED_REGS */		\
-  { 0x1100ff,  0x1fe0 },		/* GENERAL_REGS */		\
-     { 0x100,     0x0 }, { 0x0200, 0x0 },/* FP_TOP_REG, FP_SECOND_REG */\
-    { 0xff00,     0x0 },		/* FLOAT_REGS */		\
-  { 0x200000,     0x0 },		/* SSE_FIRST_REG */		\
-{ 0x1fe00000,0x1fe000 },		/* SSE_REGS */			\
-{ 0xe0000000,    0x1f },		/* MMX_REGS */			\
-{ 0x1fe00100,0x1fe000 },		/* FP_TOP_SSE_REG */		\
-{ 0x1fe00200,0x1fe000 },		/* FP_SECOND_SSE_REG */		\
-{ 0x1fe0ff00,0x1fe000 },		/* FLOAT_SSE_REGS */		\
-  { 0x11ffff,  0x1fe0 },		/* FLOAT_INT_REGS */		\
-{ 0x1ff100ff,0x1fffe0 },		/* INT_SSE_REGS */		\
-{ 0x1ff1ffff,0x1fffe0 },		/* FLOAT_INT_SSE_REGS */	\
-{ 0xffffffff,0x1fffff }							\
+#define REG_CLASS_CONTENTS                                              \
+{     { 0x00,       0x0,   0x0 },                                       \
+      { 0x01,       0x0,   0x0 },       /* AREG */                      \
+      { 0x02,       0x0,   0x0 },       /* DREG */                      \
+      { 0x04,       0x0,   0x0 },       /* CREG */                      \
+      { 0x08,       0x0,   0x0 },       /* BREG */                      \
+      { 0x10,       0x0,   0x0 },       /* SIREG */                     \
+      { 0x20,       0x0,   0x0 },       /* DIREG */                     \
+      { 0x03,       0x0,   0x0 },       /* AD_REGS */                   \
+      { 0x0f,       0x0,   0x0 },       /* Q_REGS */                    \
+  { 0x1100f0,    0x1fe0,   0x0 },       /* NON_Q_REGS */                \
+      { 0x7f,    0x1fe0,   0x0 },       /* INDEX_REGS */                \
+  { 0x1100ff,       0x0,   0x0 },       /* LEGACY_REGS */               \
+      { 0x07,       0x0,   0x0 },       /* CLOBBERED_REGS */            \
+  { 0x1100ff,    0x1fe0,   0x0 },       /* GENERAL_REGS */              \
+     { 0x100,       0x0,   0x0 },       /* FP_TOP_REG */                \
+    { 0x0200,       0x0,   0x0 },       /* FP_SECOND_REG */             \
+    { 0xff00,       0x0,   0x0 },       /* FLOAT_REGS */                \
+  { 0x200000,       0x0,   0x0 },       /* SSE_FIRST_REG */             \
+{ 0x1fe00000,  0x1fe000,   0x0 },       /* SSE_REGS */                  \
+       { 0x0,0xffe00000,  0x1f },       /* EVEX_SSE_REGS */             \
+{ 0x1fe00000,0xffffe000,  0x1f },       /* ALL_SSE_REGS */              \
+{ 0xe0000000,      0x1f,   0x0 },       /* MMX_REGS */                  \
+{ 0x1fe00100,0xffffe000,  0x1f },       /* FP_TOP_SSE_REG */            \
+{ 0x1fe00200,0xffffe000,  0x1f },       /* FP_SECOND_SSE_REG */         \
+{ 0x1fe0ff00,0xffffe000,  0x1f },       /* FLOAT_SSE_REGS */            \
+{   0x11ffff,    0x1fe0,   0x0 },       /* FLOAT_INT_REGS */            \
+{ 0x1ff100ff,0xffffffe0,  0x1f },       /* INT_SSE_REGS */              \
+{ 0x1ff1ffff,0xffffffe0,  0x1f },       /* FLOAT_INT_SSE_REGS */        \
+{ 0xffffffff,0xffffffff,  0x1f }                                        \
 }
 
 /* The same information, inverted:
@@ -1393,13 +1363,20 @@ enum reg_class
 #define SSE_REG_P(X) (REG_P (X) && SSE_REGNO_P (REGNO (X)))
 #define SSE_REGNO_P(N)						\
   (IN_RANGE ((N), FIRST_SSE_REG, LAST_SSE_REG)			\
-   || REX_SSE_REGNO_P (N))
+   || REX_SSE_REGNO_P (N)					\
+   || EXT_REX_SSE_REGNO_P (N))
 
 #define REX_SSE_REGNO_P(N) \
   IN_RANGE ((N), FIRST_REX_SSE_REG, LAST_REX_SSE_REG)
 
+#define EXT_REX_SSE_REGNO_P(N) \
+  IN_RANGE ((N), FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG)
+
 #define SSE_REGNO(N) \
-  ((N) < 8 ? FIRST_SSE_REG + (N) : FIRST_REX_SSE_REG + (N) - 8)
+  ((N) < 8 ? FIRST_SSE_REG + (N) \
+         : (N) <= LAST_REX_SSE_REG ? (FIRST_REX_SSE_REG + (N) - 8) \
+                                   : (FIRST_EXT_REX_SSE_REG + (N) - 16))
+
 
 #define SSE_FLOAT_MODE_P(MODE) \
   ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
@@ -1952,7 +1929,11 @@ do {							\
  "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",		\
  "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",		\
  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",			\
- "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"}
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",	\
+ "xmm16", "xmm17", "xmm18", "xmm19",					\
+ "xmm20", "xmm21", "xmm22", "xmm23",					\
+ "xmm24", "xmm25", "xmm26", "xmm27",					\
+ "xmm28", "xmm29", "xmm30", "xmm31" }
 
 #define REGISTER_NAMES HI_REGISTER_NAMES
 
@@ -2273,9 +2254,13 @@ enum avx_u128_state
    scheduling just increases amount of live registers at time and in
    the turn amount of fxch instructions needed.
 
-   ??? Maybe Pentium chips benefits from renaming, someone can try....  */
+   ??? Maybe Pentium chips benefits from renaming, someone can try....
+
+   Don't rename evex to non-evex sse registers.  */
 
-#define HARD_REGNO_RENAME_OK(SRC, TARGET) !STACK_REGNO_P (SRC)
+#define HARD_REGNO_RENAME_OK(SRC, TARGET) (!STACK_REGNO_P (SRC) &&	 \
+					   (EXT_REX_SSE_REGNO_P (SRC) == \
+					    EXT_REX_SSE_REGNO_P (TARGET)))
 
 
 #define FASTCALL_PREFIX '@'
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c67ed31923e..3307b081aaa 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -312,6 +312,22 @@
    (XMM13_REG			50)
    (XMM14_REG			51)
    (XMM15_REG			52)
+   (XMM16_REG			53)
+   (XMM17_REG			54)
+   (XMM18_REG			55)
+   (XMM19_REG			56)
+   (XMM20_REG			57)
+   (XMM21_REG			58)
+   (XMM22_REG			59)
+   (XMM23_REG			60)
+   (XMM24_REG			61)
+   (XMM25_REG			62)
+   (XMM26_REG			63)
+   (XMM27_REG			64)
+   (XMM28_REG			65)
+   (XMM29_REG			66)
+   (XMM30_REG			67)
+   (XMM31_REG			68)
   ])
 
 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
@@ -350,7 +366,8 @@
 
 ;; Main data type used by the insn
 (define_attr "mode"
-  "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF"
+  "unknown,none,QI,HI,SI,DI,TI,OI,XI,SF,DF,XF,TF,V16SF,V8SF,V4DF,V4SF,
+  V2DF,V2SF,V1DF,V8DF"
   (const_string "unknown"))
 
 ;; The CPU unit operations uses.
@@ -471,10 +488,13 @@
 	(const_int 0)))
 
 ;; Prefix used: original, VEX or maybe VEX.
-(define_attr "prefix" "orig,vex,maybe_vex"
-  (if_then_else (eq_attr "mode" "OI,V8SF,V4DF")
-    (const_string "vex")
-    (const_string "orig")))
+(define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex"
+  (cond [(eq_attr "mode" "OI,V8SF,V4DF")
+           (const_string "vex")
+         (eq_attr "mode" "XI,V16SF,V8DF")
+           (const_string "evex")
+        ]
+        (const_string "orig")))
 
 ;; VEX W bit is used.
 (define_attr "prefix_vex_w" "" (const_int 0))
@@ -493,6 +513,9 @@
       (symbol_ref "ix86_attr_length_vex_default (insn, false, true)")
       (symbol_ref "ix86_attr_length_vex_default (insn, false, false)"))))
 
+;; 4-bytes evex prefix and 1 byte opcode.
+(define_attr "length_evex" "" (const_int 5))
+
 ;; Set when modrm byte is used.
 (define_attr "modrm" ""
   (cond [(eq_attr "type" "str,leave")
@@ -544,8 +567,17 @@
 	   (plus (const_int 2)
 		 (plus (attr "prefix_data16")
 		       (attr "length_address")))
+	 (ior (eq_attr "prefix" "evex")
+	      (and (ior (eq_attr "prefix" "maybe_evex")
+			(eq_attr "prefix" "maybe_vex"))
+		   (match_test "TARGET_AVX512F")))
+	   (plus (attr "length_evex")
+		 (plus (attr "length_immediate")
+		       (plus (attr "modrm")
+			     (attr "length_address"))))
 	 (ior (eq_attr "prefix" "vex")
-	      (and (eq_attr "prefix" "maybe_vex")
+	      (and (ior (eq_attr "prefix" "maybe_vex")
+			(eq_attr "prefix" "maybe_evex"))
 		   (match_test "TARGET_AVX")))
 	   (plus (attr "length_vex")
 		 (plus (attr "length_immediate")
@@ -663,7 +695,7 @@
 ;; Used to control the "enabled" attribute on a per-instruction basis.
 (define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
 		    sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
-		    avx2,noavx2,bmi2,fma4,fma"
+		    avx2,noavx2,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f"
   (const_string "base"))
 
 (define_attr "enabled" ""
@@ -689,6 +721,10 @@
 	 (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
 	 (eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4")
 	 (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA")
+	 (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
+	 (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
+	 (eq_attr "isa" "fma_avx512f")
+	   (symbol_ref "TARGET_FMA || TARGET_AVX512F")
 	]
 	(const_int 1)))
 
@@ -924,10 +960,12 @@
 ;; SSE instruction suffix for various modes
 (define_mode_attr ssemodesuffix
   [(SF "ss") (DF "sd")
+   (V16SF "ps") (V8DF "pd")
    (V8SF "ps") (V4DF "pd")
    (V4SF "ps") (V2DF "pd")
    (V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
-   (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")])
+   (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
+   (V64QI "b") (V16SI "d") (V8DI "q")])
 
 ;; SSE vector suffix for floating point modes
 (define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")])
@@ -1649,6 +1687,12 @@
 
 ;; Move instructions.
 
+(define_expand "movxi"
+  [(set (match_operand:XI 0 "nonimmediate_operand")
+	(match_operand:XI 1 "general_operand"))]
+  "TARGET_AVX512F"
+  "ix86_expand_move (XImode, operands); DONE;")
+
 ;; Reload patterns to support multi-word load/store
 ;; with non-offsetable address.
 (define_expand "reload_noff_store"
@@ -1746,6 +1790,30 @@
    (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "1")])
 
+(define_insn "*movxi_internal_avx512f"
+  [(set (match_operand:XI 0 "nonimmediate_operand" "=x,x ,m")
+	(match_operand:XI 1 "vector_move_operand"  "C ,xm,x"))]
+  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return standard_sse_constant_opcode (insn, operands[1]);
+    case 1:
+    case 2:
+      if (misaligned_operand (operands[0], XImode)
+	  || misaligned_operand (operands[1], XImode))
+	return "vmovdqu32\t{%1, %0|%0, %1}";
+      else
+	return "vmovdqa32\t{%1, %0|%0, %1}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_insn "*movoi_internal_avx"
   [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x ,m")
 	(match_operand:OI 1 "vector_move_operand"  "C ,xm,x"))]
@@ -1857,9 +1925,9 @@
 
 (define_insn "*movdi_internal"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*x,*x,*x,m ,?r ,?r,?*Yi,?*Ym,?*Yi")
+    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi")
 	(match_operand:DI 1 "general_operand"
-    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*Yn,r   ,C ,*x,m ,*x,*Yj,*x,r   ,*Yj ,*Yn"))]
+    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*Yn,r   ,C ,*v,m ,*v,*Yj,*v,r   ,*Yj ,*Yn"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -1896,6 +1964,8 @@
 	  return "%vmovq\t{%1, %0|%0, %1}";
 	case MODE_TI:
 	  return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_XI:
+	  return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
 
 	case MODE_V2SF:
 	  gcc_assert (!TARGET_AVX);
@@ -1989,7 +2059,10 @@
      (cond [(eq_attr "alternative" "2")
 	      (const_string "SI")
 	    (eq_attr "alternative" "12,13")
-	      (cond [(ior (not (match_test "TARGET_SSE2"))
+	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+			  (match_operand 1 "ext_sse_reg_operand"))
+		       (const_string "XI")
+		     (ior (not (match_test "TARGET_SSE2"))
 			  (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
 		       (const_string "V4SF")
 		     (match_test "TARGET_AVX")
@@ -2018,9 +2091,9 @@
 
 (define_insn "*movsi_internal"
   [(set (match_operand:SI 0 "nonimmediate_operand"
-			"=r,m ,*y,*y,?rm,?*y,*x,*x,*x,m ,?r ,?r,?*Yi")
+			"=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi")
 	(match_operand:SI 1 "general_operand"
-			"g ,re,C ,*y,*y ,rm ,C ,*x,m ,*x,*Yj,*x,r"))]
+			"g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -2038,6 +2111,8 @@
           return "%vmovd\t{%1, %0|%0, %1}";
 	case MODE_TI:
 	  return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_XI:
+	  return "vmovdqa32\t{%g1, %g0|%g0, %g1}";
 
 	case MODE_V4SF:
 	  return "%vmovaps\t{%1, %0|%0, %1}";
@@ -2116,7 +2191,10 @@
      (cond [(eq_attr "alternative" "2,3")
 	      (const_string "DI")
 	    (eq_attr "alternative" "6,7")
-	      (cond [(ior (not (match_test "TARGET_SSE2"))
+	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+			  (match_operand 1 "ext_sse_reg_operand"))
+		       (const_string "XI")
+		     (ior (not (match_test "TARGET_SSE2"))
 			  (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
 		       (const_string "V4SF")
 		     (match_test "TARGET_AVX")
@@ -2255,7 +2333,7 @@
   "TARGET_LP64 && ix86_check_movabs (insn, 0)"
   "@
    movabs{<imodesuffix>}\t{%1, %P0|[%P0], %1}
-   mov{<imodesuffix>}\t{%1, %a0|%a0, %1}"
+   mov{<imodesuffix>}\t{%1, %a0|<iptrsize> PTR %a0, %1}"
   [(set_attr "type" "imov")
    (set_attr "modrm" "0,*")
    (set_attr "length_address" "8,0")
@@ -2269,7 +2347,7 @@
   "TARGET_LP64 && ix86_check_movabs (insn, 1)"
   "@
    movabs{<imodesuffix>}\t{%P1, %0|%0, [%P1]}
-   mov{<imodesuffix>}\t{%a1, %0|%0, %a1}"
+   mov{<imodesuffix>}\t{%a1, %0|%0, <iptrsize> PTR %a1}"
   [(set_attr "type" "imov")
    (set_attr "modrm" "0,*")
    (set_attr "length_address" "8,0")
@@ -2703,9 +2781,9 @@
 ;; Possible store forwarding (partial memory) stall in alternative 4.
 (define_insn "*movdf_internal"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-    "=Yf*f,m   ,Yf*f,?Yd*r ,!o   ,?r,?m,?r,?r,x,x,x,m,*x,*x,*x,m ,r ,Yi")
+    "=Yf*f,m   ,Yf*f,?Yd*r ,!o   ,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,Yi")
 	(match_operand:DF 1 "general_operand"
-    "Yf*fm,Yf*f,G   ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,x,m,x,C ,*x,m ,*x,Yj,r"))]
+    "Yf*fm,Yf*f,G   ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,Yj,r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -2750,6 +2828,8 @@
 
 	case MODE_V4SF:
 	  return "%vmovaps\t{%1, %0|%0, %1}";
+	case MODE_V8DF:
+	  return "vmovapd\t{%g1, %g0|%g0, %g1}";
 	case MODE_V2DF:
 	  return "%vmovapd\t{%1, %0|%0, %1}";
 
@@ -2824,6 +2904,8 @@
 	       (eq_attr "alternative" "9,13")
 		 (cond [(not (match_test "TARGET_SSE2"))
 		 	  (const_string "V4SF")
+			(match_test "TARGET_AVX512F")
+			  (const_string "XI")
 			(match_test "TARGET_AVX")
 			  (const_string "V2DF")
 			(match_test "optimize_function_for_size_p (cfun)")
@@ -2839,7 +2921,10 @@
 
 	       /* movaps is one byte shorter for non-AVX targets.  */
 	       (eq_attr "alternative" "10,14")
-		 (cond [(ior (not (match_test "TARGET_SSE2"))
+		 (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+			     (match_operand 1 "ext_sse_reg_operand"))
+			  (const_string "V8DF")
+			(ior (not (match_test "TARGET_SSE2"))
 			     (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
 			  (const_string "V4SF")
 			(match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
@@ -2872,9 +2957,9 @@
 
 (define_insn "*movsf_internal"
   [(set (match_operand:SF 0 "nonimmediate_operand"
-	  "=Yf*f,m   ,Yf*f,?r ,?m,x,x,x,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym")
+	  "=Yf*f,m   ,Yf*f,?r ,?m,v,v,v,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym")
 	(match_operand:SF 1 "general_operand"
-	  "Yf*fm,Yf*f,G   ,rmF,rF,C,x,m,x,Yj,r  ,*y ,m  ,*y,*Yn,r"))]
+	  "Yf*fm,Yf*f,G   ,rmF,rF,C,v,m,v,Yj,r  ,*y ,m  ,*y,*Yn,r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -2907,6 +2992,8 @@
 	    return "vmovss\t{%1, %0, %0|%0, %0, %1}";
 	  return "%vmovss\t{%1, %0|%0, %1}";
 
+	case MODE_V16SF:
+	  return "vmovaps\t{%g1, %g0|%g0, %g1}";
 	case MODE_V4SF:
 	  return "%vmovaps\t{%1, %0|%0, %1}";
 
@@ -2960,6 +3047,8 @@
 	       (eq_attr "alternative" "5")
 		 (cond [(not (match_test "TARGET_SSE2"))
  		 	  (const_string "V4SF")
+			(match_test "TARGET_AVX512F")
+			  (const_string "V16SF")
 			(match_test "TARGET_AVX")
 			  (const_string "V4SF")
  			(match_test "optimize_function_for_size_p (cfun)")
@@ -2979,10 +3068,15 @@
 		  of instructions to load just part of the register.  It is
 		  better to maintain the whole registers in single format
 		  to avoid problems on using packed logical operations.  */
-	       (and (eq_attr "alternative" "6")
-		    (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
-			 (match_test "TARGET_SSE_SPLIT_REGS")))
-		 (const_string "V4SF")
+	       (eq_attr "alternative" "6")
+		 (cond [(ior  (match_operand 0 "ext_sse_reg_operand")
+			      (match_operand 1 "ext_sse_reg_operand"))
+			  (const_string "V16SF")
+			(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+			     (match_test "TARGET_SSE_SPLIT_REGS"))
+			  (const_string "V4SF")
+		       ]
+		       (const_string "SF"))
 	      ]
 	      (const_string "SF")))])
 
@@ -4596,10 +4690,7 @@
    (clobber (match_operand:SWI48 2 "memory_operand"))]
   "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
    && TARGET_INTER_UNIT_CONVERSIONS
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(set (match_dup 0) (float:MODEF (match_dup 1)))])
 
 (define_split
@@ -4608,10 +4699,7 @@
    (clobber (match_operand:SWI48 2 "memory_operand"))]
   "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
    && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (float:MODEF (match_dup 2)))])
 
@@ -4697,10 +4785,7 @@
    (clobber (match_operand:SI 2 "memory_operand"))]
   "TARGET_SSE2 && TARGET_SSE_MATH
    && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(const_int 0)]
 {
   rtx op1 = operands[1];
@@ -4740,10 +4825,7 @@
    (clobber (match_operand:SI 2 "memory_operand"))]
   "TARGET_SSE2 && TARGET_SSE_MATH
    && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(const_int 0)]
 {
   operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
@@ -4764,10 +4846,7 @@
 	(float:MODEF (match_operand:SI 1 "register_operand")))]
   "TARGET_SSE2 && TARGET_SSE_MATH
    && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(const_int 0)]
 {
   rtx op1 = operands[1];
@@ -4810,10 +4889,7 @@
 	(float:MODEF (match_operand:SI 1 "memory_operand")))]
   "TARGET_SSE2 && TARGET_SSE_MATH
    && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(const_int 0)]
 {
   operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
@@ -4872,10 +4948,7 @@
    (clobber (match_operand:SWI48 2 "memory_operand"))]
   "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
    && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(set (match_dup 0) (float:MODEF (match_dup 1)))])
 
 (define_insn "*float<SWI48:mode><MODEF:mode>2_sse_nointerunit"
@@ -4905,10 +4978,7 @@
    (clobber (match_operand:SWI48 2 "memory_operand"))]
   "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
    && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (float:MODEF (match_dup 2)))])
 
@@ -4917,10 +4987,7 @@
 	(float:MODEF (match_operand:SWI48 1 "memory_operand")))
    (clobber (match_operand:SWI48 2 "memory_operand"))]
   "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
-   && reload_completed
-   && (SSE_REG_P (operands[0])
-       || (GET_CODE (operands[0]) == SUBREG
-	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+   && reload_completed && SSE_REG_P (operands[0])"
   [(set (match_dup 0) (float:MODEF (match_dup 1)))])
 
 (define_insn "*float<SWI48x:mode><X87MODEF:mode>2_i387_with_temp"
@@ -4968,6 +5035,46 @@
    && reload_completed"
   [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
 
+;; Avoid partial SSE register dependency stalls
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand")
+	(float:MODEF (match_operand:SI 1 "nonimmediate_operand")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+   && optimize_function_for_speed_p (cfun)
+   && reload_completed && SSE_REG_P (operands[0])"
+  [(set (match_dup 0)
+	(vec_merge:<ssevecmode>
+	  (vec_duplicate:<ssevecmode>
+	    (float:MODEF (match_dup 1)))
+	  (match_dup 0)
+	  (const_int 1)))]
+{
+  operands[0] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+				     <MODE>mode, 0);
+  emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode));
+})
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand")
+	(float:MODEF (match_operand:DI 1 "nonimmediate_operand")))]
+  "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+   && optimize_function_for_speed_p (cfun)
+   && reload_completed && SSE_REG_P (operands[0])"
+  [(set (match_dup 0)
+	(vec_merge:<ssevecmode>
+	  (vec_duplicate:<ssevecmode>
+	    (float:MODEF (match_dup 1)))
+	  (match_dup 0)
+	  (const_int 1)))]
+{
+  operands[0] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+				     <MODE>mode, 0);
+  emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode));
+})
+
 ;; Avoid store forwarding (partial memory) stall penalty
 ;; by passing DImode value through XMM registers.  */
 
@@ -5024,6 +5131,18 @@
    && reload_completed"
   [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
 
+(define_expand "floatuns<SWI12:mode><MODEF:mode>2"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(unsigned_float:MODEF
+	  (match_operand:SWI12 1 "nonimmediate_operand")))]
+  "!TARGET_64BIT
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
+{
+  operands[1] = convert_to_mode (SImode, operands[1], 1);
+  emit_insn (gen_floatsi<MODEF:mode>2 (operands[0], operands[1]));
+  DONE;
+})
+
 ;; Avoid store forwarding (partial memory) stall penalty by extending
 ;; SImode value to DImode through XMM register instead of pushing two
 ;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES_TO_VEC
@@ -12278,11 +12397,33 @@
    (set (attr "length")
 	(symbol_ref "TARGET_X32 ? 15 : 16"))])
 
+(define_insn "*tls_global_dynamic_64_largepic"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(call:DI
+	 (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b")
+			  (match_operand:DI 3 "immediate_operand" "i")))
+	 (match_operand 4)))
+   (unspec:DI [(match_operand 1 "tls_symbolic_operand")]
+	     UNSPEC_TLS_GD)]
+  "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
+   && GET_CODE (operands[3]) == CONST
+   && GET_CODE (XEXP (operands[3], 0)) == UNSPEC
+   && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF"
+{
+  output_asm_insn
+    ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
+  output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands);
+  output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands);
+  return "call\t{*%%rax|rax}";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "22")])
+
 (define_expand "tls_global_dynamic_64_<mode>"
   [(parallel
     [(set (match_operand:P 0 "register_operand")
 	  (call:P
-	   (mem:QI (match_operand 2 "constant_call_address_operand"))
+	   (mem:QI (match_operand 2))
 	   (const_int 0)))
      (unspec:P [(match_operand 1 "tls_symbolic_operand")]
 	       UNSPEC_TLS_GD)])]
@@ -12340,11 +12481,32 @@
   [(set_attr "type" "multi")
    (set_attr "length" "12")])
 
+(define_insn "*tls_local_dynamic_base_64_largepic"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(call:DI
+	 (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b")
+			  (match_operand:DI 2 "immediate_operand" "i")))
+	 (match_operand 3)))
+   (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)]
+  "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
+   && GET_CODE (operands[2]) == CONST
+   && GET_CODE (XEXP (operands[2], 0)) == UNSPEC
+   && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF"
+{
+  output_asm_insn
+    ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
+  output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands);
+  output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands);
+  return "call\t{*%%rax|rax}";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "22")])
+
 (define_expand "tls_local_dynamic_base_64_<mode>"
   [(parallel
      [(set (match_operand:P 0 "register_operand")
 	   (call:P
-	    (mem:QI (match_operand 1 "constant_call_address_operand"))
+	    (mem:QI (match_operand 1))
 	    (const_int 0)))
       (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
   "TARGET_64BIT")
@@ -12629,10 +12791,10 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fop_<mode>_comm_sse"
-  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+  [(set (match_operand:MODEF 0 "register_operand" "=x,v")
 	(match_operator:MODEF 3 "binary_fp_operator"
-	  [(match_operand:MODEF 1 "nonimmediate_operand" "%0,x")
-	   (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))]
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "%0,v")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")]))]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
    && COMMUTATIVE_ARITH_P (operands[3])
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
@@ -14884,7 +15046,7 @@
   [(use (match_operand:SI 0 "register_operand"))
    (use (match_operand:XF 1 "register_operand"))]
   "TARGET_USE_FANCY_MATH_387
-   && TARGET_C99_FUNCTIONS"
+   && ix86_libc_has_function (function_c99_misc)"
 {
   rtx mask = GEN_INT (0x45);
   rtx val = GEN_INT (0x05);
@@ -14910,7 +15072,7 @@
   [(use (match_operand:SI 0 "register_operand"))
    (use (match_operand:MODEF 1 "nonimmediate_operand"))]
   "TARGET_USE_FANCY_MATH_387
-   && TARGET_C99_FUNCTIONS
+   && ix86_libc_has_function (function_c99_misc)
    && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
 {
   rtx mask = GEN_INT (0x45);
@@ -15977,10 +16139,10 @@
 ;; are undefined in this condition, we're certain this is correct.
 
 (define_insn "<code><mode>3"
-  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+  [(set (match_operand:MODEF 0 "register_operand" "=x,v")
 	(smaxmin:MODEF
-	  (match_operand:MODEF 1 "nonimmediate_operand" "%0,x")
-	  (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")))]
+	  (match_operand:MODEF 1 "nonimmediate_operand" "%0,v")
+	  (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")))]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
   "@
    <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 9fbf5451e9c..5495c295f57 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -316,6 +316,14 @@ mstack-arg-probe
 Target Report Mask(STACK_PROBE) Save
 Enable stack probing
 
+mmemcpy-strategy=
+Target RejectNegative Joined Var(ix86_tune_memcpy_strategy)
+Specify memcpy expansion strategy when expected size is known
+
+mmemset-strategy=
+Target RejectNegative Joined Var(ix86_tune_memset_strategy)
+Specify memset expansion strategy when expected size is known
+
 mstringop-strategy=
 Target RejectNegative Joined Enum(stringop_alg) Var(ix86_stringop_alg) Init(no_stringop)
 Chose strategy to generate stringop using
@@ -370,6 +378,17 @@ mtune=
 Target RejectNegative Joined Var(ix86_tune_string)
 Schedule code for given CPU
 
+mtune-ctrl=
+Target RejectNegative Joined Var(ix86_tune_ctrl_string)
+Fine grain control of tune features
+
+mno-default
+Target RejectNegative Var(ix86_tune_no_default) Init(0)
+Clear all tune features
+
+mdump-tune-features
+Target RejectNegative Var(ix86_dump_tunes) Init(0)
+
 mabi=
 Target RejectNegative Joined Var(ix86_abi) Enum(calling_abi) Init(SYSV_ABI)
 Generate code that conforms to the given ABI
@@ -498,6 +517,22 @@ mavx2
 Target Report Mask(ISA_AVX2) Var(ix86_isa_flags) Save
 Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation
 
+mavx512f
+Target Report Mask(ISA_AVX512F) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F built-in functions and code generation
+
+mavx512pf
+Target Report Mask(ISA_AVX512PF) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512PF built-in functions and code generation
+
+mavx512er
+Target Report Mask(ISA_AVX512ER) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512ER built-in functions and code generation
+
+mavx512cd
+Target Report Mask(ISA_AVX512CD) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512CD built-in functions and code generation
+
 mfma
 Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save
 Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation
diff --git a/gcc/config/i386/linux-common.h b/gcc/config/i386/linux-common.h
index 1e8bf6b2dc0..52f0baf202e 100644
--- a/gcc/config/i386/linux-common.h
+++ b/gcc/config/i386/linux-common.h
@@ -40,7 +40,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef  LIB_SPEC
 #define LIB_SPEC \
   LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \
-		       GNU_USER_TARGET_LIB_SPEC " " ANDROID_LIB_SPEC)
+		    GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC)
 
 #undef  STARTFILE_SPEC
 #define STARTFILE_SPEC \
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 12c062687c0..17e24999258 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -78,9 +78,9 @@
 
 (define_insn "*mov<mode>_internal"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand"
-    "=r ,o ,r,r ,m ,?!y,!y,?!y,m  ,r   ,?!Ym,x,x,x,m,*x,*x,*x,m ,r ,Yi,!Ym,*Yi")
+    "=r ,o ,r,r ,m ,?!y,!y,?!y,m  ,r   ,?!Ym,v,v,v,m,*x,*x,*x,m ,r ,Yi,!Ym,*Yi")
 	(match_operand:MMXMODE 1 "vector_move_operand"
-    "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!Yn,r   ,C,x,m,x,C ,*x,m ,*x,Yj,r ,*Yj,!Yn"))]
+    "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!Yn,r   ,C,v,m,v,C ,*x,m ,*x,Yj,r ,*Yj,!Yn"))]
   "TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
@@ -128,6 +128,9 @@
 	case MODE_TI:
 	  return "%vmovdqa\t{%1, %0|%0, %1}";
 
+	case MODE_XI:
+	  return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
+
 	case MODE_V2SF:
 	  if (TARGET_AVX && REG_P (operands[0]))
 	    return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
@@ -182,7 +185,10 @@
      (cond [(eq_attr "alternative" "2")
 	      (const_string "SI")
 	    (eq_attr "alternative" "11,12,15,16")
-	      (cond [(match_test "<MODE>mode == V2SFmode")
+	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+			  (match_operand 1 "ext_sse_reg_operand"))
+			(const_string "XI")
+		     (match_test "<MODE>mode == V2SFmode")
 		       (const_string "V4SF")
 		     (ior (not (match_test "TARGET_SSE2"))
 			  (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index b64ef6999ee..3959c3892e4 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -47,6 +47,12 @@
   (and (match_code "reg")
        (match_test "SSE_REGNO_P (REGNO (op))")))
 
+;; True if the operand is an AVX-512 new register.
+(define_predicate "ext_sse_reg_operand"
+  (and (match_code "reg")
+       (match_test "EXT_REX_SSE_REGNO_P (REGNO (op))")))
+
+
 ;; True if the operand is a Q_REGS class register.
 (define_predicate "q_regs_operand"
   (match_operand 0 "register_operand")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 439749877f2..9d9469e2c62 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -96,7 +96,7 @@
 ])
 
 ;; All vector modes including V?TImode, used in move patterns.
-(define_mode_iterator V16
+(define_mode_iterator VMOVE
   [(V32QI "TARGET_AVX") V16QI
    (V16HI "TARGET_AVX") V8HI
    (V8SI "TARGET_AVX") V4SI
@@ -244,6 +244,13 @@
    (V4SI "vec") (V8SI "avx2")
    (V2DI "vec") (V4DI "avx2")])
 
+(define_mode_attr shuffletype
+  [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
+  (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
+  (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
+  (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
+  (V64QI "i") (V1TI "i") (V2TI "i")])
+
 (define_mode_attr ssedoublemode
   [(V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
    (V32QI "V32HI") (V16QI "V16HI")])
@@ -301,8 +308,10 @@
 
 ;; SSE instruction mode
 (define_mode_attr sseinsnmode
-  [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
+  [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI")
+   (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
    (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
+   (V16SF "V16SF") (V8DF "V8DF")
    (V8SF "V8SF") (V4DF "V4DF")
    (V4SF "V4SF") (V2DF "V2DF")
    (TI "TI")])
@@ -435,8 +444,8 @@
 ;; This is essential for maintaining stable calling conventions.
 
 (define_expand "mov<mode>"
-  [(set (match_operand:V16 0 "nonimmediate_operand")
-	(match_operand:V16 1 "nonimmediate_operand"))]
+  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
+	(match_operand:VMOVE 1 "nonimmediate_operand"))]
   "TARGET_SSE"
 {
   ix86_expand_vector_move (<MODE>mode, operands);
@@ -444,20 +453,64 @@
 })
 
 (define_insn "*mov<mode>_internal"
-  [(set (match_operand:V16 0 "nonimmediate_operand"               "=x,x ,m")
-	(match_operand:V16 1 "nonimmediate_or_sse_const_operand"  "C ,xm,x"))]
+  [(set (match_operand:VMOVE 0 "nonimmediate_operand"               "=v,v ,m")
+	(match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"  "C ,vm,v"))]
   "TARGET_SSE
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
 {
+  int mode = get_attr_mode (insn);
   switch (which_alternative)
     {
     case 0:
       return standard_sse_constant_opcode (insn, operands[1]);
     case 1:
     case 2:
-      switch (get_attr_mode (insn))
+      /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
+	 in avx512f, so we need to use workarounds, to access sse registers
+	 16-31, which are evex-only.  */
+      if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64
+	  && (EXT_REX_SSE_REGNO_P (REGNO (operands[0]))
+	      || EXT_REX_SSE_REGNO_P (REGNO (operands[1]))))
 	{
+	  if (memory_operand (operands[0], <MODE>mode))
+	    {
+	      if (GET_MODE_SIZE (<MODE>mode) == 32)
+		return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
+	      else if (GET_MODE_SIZE (<MODE>mode) == 16)
+		return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
+	      else
+		gcc_unreachable ();
+	    }
+	  else if (memory_operand (operands[1], <MODE>mode))
+	    {
+	      if (GET_MODE_SIZE (<MODE>mode) == 32)
+		return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
+	      else if (GET_MODE_SIZE (<MODE>mode) == 16)
+		return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
+	      else
+		gcc_unreachable ();
+	    }
+	  else
+	    /* Reg -> reg move is always aligned.  Just use wider move.  */
+	    switch (mode)
+	      {
+	      case MODE_V8SF:
+	      case MODE_V4SF:
+		return "vmovaps\t{%g1, %g0|%g0, %g1}";
+	      case MODE_V4DF:
+	      case MODE_V2DF:
+		return "vmovapd\t{%g1, %g0|%g0, %g1}";
+	      case MODE_OI:
+	      case MODE_TI:
+		return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
+	      default:
+		gcc_unreachable ();
+	      }
+	}
+      switch (mode)
+	{
+	case MODE_V16SF:
 	case MODE_V8SF:
 	case MODE_V4SF:
 	  if (TARGET_AVX
@@ -467,6 +520,7 @@
 	  else
 	    return "%vmovaps\t{%1, %0|%0, %1}";
 
+	case MODE_V8DF:
 	case MODE_V4DF:
 	case MODE_V2DF:
 	  if (TARGET_AVX
@@ -484,6 +538,12 @@
 	    return "vmovdqu\t{%1, %0|%0, %1}";
 	  else
 	    return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_XI:
+	  if (misaligned_operand (operands[0], <MODE>mode)
+	      || misaligned_operand (operands[1], <MODE>mode))
+	    return "vmovdqu64\t{%1, %0|%0, %1}";
+	  else
+	    return "vmovdqa64\t{%1, %0|%0, %1}";
 
 	default:
 	  gcc_unreachable ();
@@ -586,7 +646,7 @@
 })
 
 (define_expand "push<mode>1"
-  [(match_operand:V16 0 "register_operand")]
+  [(match_operand:VMOVE 0 "register_operand")]
   "TARGET_SSE"
 {
   ix86_expand_push (<MODE>mode, operands[0]);
@@ -594,8 +654,8 @@
 })
 
 (define_expand "movmisalign<mode>"
-  [(set (match_operand:V16 0 "nonimmediate_operand")
-	(match_operand:V16 1 "nonimmediate_operand"))]
+  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
+	(match_operand:VMOVE 1 "nonimmediate_operand"))]
   "TARGET_SSE"
 {
   ix86_expand_vector_move_misalign (<MODE>mode, operands);
@@ -603,7 +663,7 @@
 })
 
 (define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
-  [(set (match_operand:VF 0 "register_operand" "=x")
+  [(set (match_operand:VF 0 "register_operand" "=v")
 	(unspec:VF
 	  [(match_operand:VF 1 "memory_operand" "m")]
 	  UNSPEC_LOADU))]
@@ -662,7 +722,7 @@
 	      (const_string "<MODE>")))])
 
 (define_insn "<sse2>_loaddqu<avxsizesuffix>"
-  [(set (match_operand:VI1 0 "register_operand" "=x")
+  [(set (match_operand:VI1 0 "register_operand" "=v")
 	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
 		    UNSPEC_LOADU))]
   "TARGET_SSE2"
@@ -696,7 +756,7 @@
 
 (define_insn "<sse2>_storedqu<avxsizesuffix>"
   [(set (match_operand:VI1 0 "memory_operand" "=m")
-	(unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")]
+	(unspec:VI1 [(match_operand:VI1 1 "register_operand" "v")]
 		    UNSPEC_STOREU))]
   "TARGET_SSE2"
 {
@@ -863,10 +923,10 @@
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*<plusminus_insn><mode>3"
-  [(set (match_operand:VF 0 "register_operand" "=x,x")
+  [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(plusminus:VF
-	  (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
+	  (match_operand:VF 1 "nonimmediate_operand" "<comm>0,v")
+	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
    <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -877,11 +937,11 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sse>_vm<plusminus_insn><mode>3"
-  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (plusminus:VF_128
-	    (match_operand:VF_128 1 "register_operand" "0,x")
-	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
+	    (match_operand:VF_128 1 "register_operand" "0,v")
+	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_SSE"
@@ -917,11 +977,11 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sse>_vmmul<mode>3"
-  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (mult:VF_128
-	    (match_operand:VF_128 1 "register_operand" "0,x")
-	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
+	    (match_operand:VF_128 1 "register_operand" "0,v")
+	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_SSE"
@@ -960,10 +1020,10 @@
 })
 
 (define_insn "<sse>_div<mode>3"
-  [(set (match_operand:VF 0 "register_operand" "=x,x")
+  [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(div:VF
-	  (match_operand:VF 1 "register_operand" "0,x")
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
+	  (match_operand:VF 1 "register_operand" "0,v")
+	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE"
   "@
    div<ssemodesuffix>\t{%2, %0|%0, %2}
@@ -974,11 +1034,11 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sse>_vmdiv<mode>3"
-  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (div:VF_128
-	    (match_operand:VF_128 1 "register_operand" "0,x")
-	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
+	    (match_operand:VF_128 1 "register_operand" "0,v")
+	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_SSE"
@@ -1043,8 +1103,8 @@
 })
 
 (define_insn "<sse>_sqrt<mode>2"
-  [(set (match_operand:VF 0 "register_operand" "=x")
-	(sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
+  [(set (match_operand:VF 0 "register_operand" "=v")
+	(sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "vm")))]
   "TARGET_SSE"
   "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")
@@ -1054,11 +1114,11 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sse>_vmsqrt<mode>2"
-  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (sqrt:VF_128
-	    (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
-	  (match_operand:VF_128 2 "register_operand" "0,x")
+	    (match_operand:VF_128 1 "nonimmediate_operand" "xm,vm"))
+	  (match_operand:VF_128 2 "register_operand" "0,v")
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
@@ -1124,10 +1184,10 @@
 })
 
 (define_insn "*<code><mode>3_finite"
-  [(set (match_operand:VF 0 "register_operand" "=x,x")
+  [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(smaxmin:VF
-	  (match_operand:VF 1 "nonimmediate_operand" "%0,x")
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
+	  (match_operand:VF 1 "nonimmediate_operand" "%0,v")
+	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE && flag_finite_math_only
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
@@ -1140,10 +1200,10 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*<code><mode>3"
-  [(set (match_operand:VF 0 "register_operand" "=x,x")
+  [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(smaxmin:VF
-	  (match_operand:VF 1 "register_operand" "0,x")
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
+	  (match_operand:VF 1 "register_operand" "0,v")
+	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE && !flag_finite_math_only"
   "@
    <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -1155,11 +1215,11 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sse>_vm<code><mode>3"
-  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (smaxmin:VF_128
-	    (match_operand:VF_128 1 "register_operand" "0,x")
-	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
+	    (match_operand:VF_128 1 "register_operand" "0,v")
+	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
 	 (match_dup 1)
 	 (const_int 1)))]
   "TARGET_SSE"
@@ -1790,10 +1850,10 @@
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*<code><mode>3"
-  [(set (match_operand:VF 0 "register_operand" "=x,x")
+  [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(any_logic:VF
-	  (match_operand:VF 1 "nonimmediate_operand" "%0,x")
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
+	  (match_operand:VF 1 "nonimmediate_operand" "%0,v")
+	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
   static char buf[32];
@@ -2101,11 +2161,11 @@
   "TARGET_FMA || TARGET_FMA4")
 
 (define_insn "*fma_fmadd_<mode>"
-  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
+  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
 	(fma:FMAMODE
-	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
-	  (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
-	  (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
+	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
+	  (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
+	  (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
   "TARGET_FMA || TARGET_FMA4"
   "@
    vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
@@ -2113,17 +2173,17 @@
    vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
    vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fma_fmsub_<mode>"
-  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
+  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
 	(fma:FMAMODE
-	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0, 0,x, x,x")
-	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm,x,m")
+	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0, 0, v, x,x")
+	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm, v,vm, x,m")
 	  (neg:FMAMODE
-	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
+	    (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
   "TARGET_FMA || TARGET_FMA4"
   "@
    vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
@@ -2131,17 +2191,17 @@
    vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
    vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fma_fnmadd_<mode>"
-  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
+  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
 	(fma:FMAMODE
 	  (neg:FMAMODE
-	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
-	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm,x,m")
-	  (match_operand:FMAMODE   3 "nonimmediate_operand" " x,xm,0,xm,x")))]
+	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
+	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm, v,vm, x,m")
+	  (match_operand:FMAMODE   3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
   "TARGET_FMA || TARGET_FMA4"
   "@
    vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
@@ -2149,18 +2209,18 @@
    vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
    vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fma_fnmsub_<mode>"
-  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
+  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
 	(fma:FMAMODE
 	  (neg:FMAMODE
-	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
-	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm,x,m")
+	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
+	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm, v,vm, x,m")
 	  (neg:FMAMODE
-	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
+	    (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
   "TARGET_FMA || TARGET_FMA4"
   "@
    vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
@@ -2168,7 +2228,7 @@
    vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
    vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
@@ -2193,11 +2253,11 @@
   "TARGET_FMA || TARGET_FMA4")
 
 (define_insn "*fma_fmaddsub_<mode>"
-  [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
+  [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
 	(unspec:VF
-	  [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
-	   (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
-	   (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")]
+	  [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x")
+	   (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m")
+	   (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x")]
 	  UNSPEC_FMADDSUB))]
   "TARGET_FMA || TARGET_FMA4"
   "@
@@ -2206,17 +2266,17 @@
    vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
    vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fma_fmsubadd_<mode>"
-  [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
+  [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
 	(unspec:VF
-	  [(match_operand:VF   1 "nonimmediate_operand" "%0, 0,x, x,x")
-	   (match_operand:VF   2 "nonimmediate_operand" "xm, x,xm,x,m")
+	  [(match_operand:VF   1 "nonimmediate_operand" "%0, 0, v, x,x")
+	   (match_operand:VF   2 "nonimmediate_operand" "vm, v,vm, x,m")
 	   (neg:VF
-	     (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))]
+	     (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x"))]
 	  UNSPEC_FMADDSUB))]
   "TARGET_FMA || TARGET_FMA4"
   "@
@@ -2225,7 +2285,7 @@
    vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
    vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
@@ -2244,12 +2304,12 @@
   "TARGET_FMA")
 
 (define_insn "*fmai_fmadd_<mode>"
-  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
         (vec_merge:VF_128
 	  (fma:VF_128
 	    (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
-	    (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
-	    (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
+	    (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
+	    (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_FMA"
@@ -2260,13 +2320,13 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fmai_fmsub_<mode>"
-  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
         (vec_merge:VF_128
 	  (fma:VF_128
 	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
-	    (match_operand:VF_128   2 "nonimmediate_operand" "xm, x")
+	    (match_operand:VF_128   2 "nonimmediate_operand" "vm, v")
 	    (neg:VF_128
-	      (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
+	      (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_FMA"
@@ -2277,13 +2337,13 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fmai_fnmadd_<mode>"
-  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
         (vec_merge:VF_128
 	  (fma:VF_128
 	    (neg:VF_128
-	      (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
+	      (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
 	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
-	    (match_operand:VF_128   3 "nonimmediate_operand" " x,xm"))
+	    (match_operand:VF_128   3 "nonimmediate_operand" " v,vm"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_FMA"
@@ -2294,14 +2354,14 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fmai_fnmsub_<mode>"
-  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
         (vec_merge:VF_128
 	  (fma:VF_128
 	    (neg:VF_128
-	      (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
+	      (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
 	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
 	    (neg:VF_128
-	      (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
+	      (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_FMA"
@@ -2429,11 +2489,11 @@
    (set_attr "mode" "SF")])
 
 (define_insn "sse_cvtsi2ss"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
 	(vec_merge:V4SF
 	  (vec_duplicate:V4SF
 	    (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
-	  (match_operand:V4SF 1 "register_operand" "0,0,x")
+	  (match_operand:V4SF 1 "register_operand" "0,0,v")
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
@@ -2450,11 +2510,11 @@
    (set_attr "mode" "SF")])
 
 (define_insn "sse_cvtsi2ssq"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
 	(vec_merge:V4SF
 	  (vec_duplicate:V4SF
 	    (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
-	  (match_operand:V4SF 1 "register_operand" "0,0,x")
+	  (match_operand:V4SF 1 "register_operand" "0,0,v")
 	  (const_int 1)))]
   "TARGET_SSE && TARGET_64BIT"
   "@
@@ -2476,7 +2536,7 @@
   [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(unspec:SI
 	  [(vec_select:SF
-	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+	     (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
 	     (parallel [(const_int 0)]))]
 	  UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE"
@@ -2490,7 +2550,7 @@
 
 (define_insn "sse_cvtss2si_2"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+	(unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
 		   UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE"
   "%vcvtss2si\t{%1, %0|%0, %k1}"
@@ -2506,7 +2566,7 @@
   [(set (match_operand:DI 0 "register_operand" "=r,r")
 	(unspec:DI
 	  [(vec_select:SF
-	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+	     (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
 	     (parallel [(const_int 0)]))]
 	  UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE && TARGET_64BIT"
@@ -2536,7 +2596,7 @@
   [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(fix:SI
 	  (vec_select:SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+	    (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
 	    (parallel [(const_int 0)]))))]
   "TARGET_SSE"
   "%vcvttss2si\t{%1, %0|%0, %k1}"
@@ -2552,7 +2612,7 @@
   [(set (match_operand:DI 0 "register_operand" "=r,r")
 	(fix:DI
 	  (vec_select:SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+	    (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
 	    (parallel [(const_int 0)]))))]
   "TARGET_SSE && TARGET_64BIT"
   "%vcvttss2si{q}\t{%1, %0|%0, %k1}"
@@ -2565,9 +2625,9 @@
    (set_attr "mode" "DI")])
 
 (define_insn "float<sseintvecmodelower><mode>2"
-  [(set (match_operand:VF1 0 "register_operand" "=x")
+  [(set (match_operand:VF1 0 "register_operand" "=v")
 	(float:VF1
-	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
+	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
   "TARGET_SSE2"
   "%vcvtdq2ps\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
@@ -2752,7 +2812,7 @@
 
 (define_insn "sse2_cvtsd2si_2"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
+	(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
 		   UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE2"
   "%vcvtsd2si\t{%1, %0|%0, %q1}"
@@ -2782,7 +2842,7 @@
 
 (define_insn "sse2_cvtsd2siq_2"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
+	(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
 		   UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE2 && TARGET_64BIT"
   "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
@@ -2981,12 +3041,12 @@
    (set_attr "mode" "TI")])
 
 (define_insn "sse2_cvtsd2ss"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
 	(vec_merge:V4SF
 	  (vec_duplicate:V4SF
 	    (float_truncate:V2SF
-	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
-	  (match_operand:V4SF 1 "register_operand" "0,0,x")
+	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m,vm")))
+	  (match_operand:V4SF 1 "register_operand" "0,0,v")
 	  (const_int 1)))]
   "TARGET_SSE2"
   "@
@@ -3003,13 +3063,13 @@
    (set_attr "mode" "SF")])
 
 (define_insn "sse2_cvtss2sd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
 	(vec_merge:V2DF
 	  (float_extend:V2DF
 	    (vec_select:V2SF
-	      (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
+	      (match_operand:V4SF 2 "nonimmediate_operand" "x,m,vm")
 	      (parallel [(const_int 0) (const_int 1)])))
-	  (match_operand:V2DF 1 "register_operand" "0,0,x")
+	  (match_operand:V2DF 1 "register_operand" "0,0,v")
 	  (const_int 1)))]
   "TARGET_SSE2"
   "@
@@ -5243,10 +5303,10 @@
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*<plusminus_insn><mode>3"
-  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
+  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
 	(plusminus:VI_AVX2
-	  (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
-	  (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
+	  (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
+	  (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
    p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -5266,10 +5326,10 @@
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
-  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
+  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
 	(sat_plusminus:VI12_AVX2
-	  (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
-	  (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
+	  (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
+	  (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
    p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -5641,10 +5701,10 @@
 })
 
 (define_insn "*<sse4_1_avx2>_mul<mode>3"
-  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
+  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
 	(mult:VI4_AVX2
-	  (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
-	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
+	  (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,v")
+	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
   "@
    pmulld\t{%2, %0|%0, %2}
@@ -5765,9 +5825,9 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<shift_insn><mode>3"
-  [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
+  [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,v")
 	(any_lshift:VI248_AVX2
-	  (match_operand:VI248_AVX2 1 "register_operand" "0,x")
+	  (match_operand:VI248_AVX2 1 "register_operand" "0,v")
 	  (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
   "TARGET_SSE2"
   "@
@@ -5868,10 +5928,10 @@
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*avx2_<code><mode>3"
-  [(set (match_operand:VI124_256 0 "register_operand" "=x")
+  [(set (match_operand:VI124_256 0 "register_operand" "=v")
 	(maxmin:VI124_256
-	  (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
-	  (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
+	  (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
+	  (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
   "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseiadd")
@@ -6345,10 +6405,10 @@
   "TARGET_SSE2")
 
 (define_insn "*andnot<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=x,x")
+  [(set (match_operand:VI 0 "register_operand" "=x,v")
 	(and:VI
-	  (not:VI (match_operand:VI 1 "register_operand" "0,x"))
-	  (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
+	  (not:VI (match_operand:VI 1 "register_operand" "0,v"))
+	  (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE"
 {
   static char buf[32];
@@ -6429,10 +6489,10 @@
 })
 
 (define_insn "*<code><mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=x,x")
+  [(set (match_operand:VI 0 "register_operand" "=x,v")
 	(any_logic:VI
-	  (match_operand:VI 1 "nonimmediate_operand" "%0,x")
-	  (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
+	  (match_operand:VI 1 "nonimmediate_operand" "%0,v")
+	  (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
@@ -7731,9 +7791,17 @@
 		       (mem:V16QI (match_dup 0))]
 		      UNSPEC_MASKMOV))]
   "TARGET_SSE2"
-  "%vmaskmovdqu\t{%2, %1|%1, %2}"
+{
+  /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
+     that requires %v to be at the beginning of the opcode name.  */
+  if (Pmode != word_mode)
+    fputs ("\taddr32", asm_out_file);
+  return "%vmaskmovdqu\t{%2, %1|%1, %2}";
+}
   [(set_attr "type" "ssemov")
    (set_attr "prefix_data16" "1")
+   (set (attr "length_address")
+     (symbol_ref ("Pmode != word_mode")))
    ;; The implicit %rdi operand confuses default length_vex computation.
    (set (attr "length_vex")
      (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
@@ -7781,26 +7849,18 @@
   "mwait"
   [(set_attr "length" "3")])
 
-(define_insn "sse3_monitor"
-  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
-		     (match_operand:SI 1 "register_operand" "c")
-		     (match_operand:SI 2 "register_operand" "d")]
-		    UNSPECV_MONITOR)]
-  "TARGET_SSE3 && !TARGET_64BIT"
-  "monitor\t%0, %1, %2"
-  [(set_attr "length" "3")])
-
-(define_insn "sse3_monitor64_<mode>"
+(define_insn "sse3_monitor_<mode>"
   [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
 		     (match_operand:SI 1 "register_operand" "c")
 		     (match_operand:SI 2 "register_operand" "d")]
 		    UNSPECV_MONITOR)]
-  "TARGET_SSE3 && TARGET_64BIT"
+  "TARGET_SSE3"
 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
 ;; RCX and RDX are used.  Since 32bit register operands are implicitly
 ;; zero extended to 64bit, we only need to set up 32bit registers.
-  "monitor"
-  [(set_attr "length" "3")])
+  "%^monitor"
+  [(set (attr "length")
+     (symbol_ref ("(Pmode != word_mode) + 3")))])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
@@ -8368,9 +8428,9 @@
    (set_attr "mode" "DI")])
 
 (define_insn "abs<mode>2"
-  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
+  [(set (match_operand:VI124_AVX2 0 "register_operand" "=v")
 	(abs:VI124_AVX2
-	  (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
+	  (match_operand:VI124_AVX2 1 "nonimmediate_operand" "vm")))]
   "TARGET_SSSE3"
   "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "sselog1")
@@ -10481,10 +10541,10 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx2_permvar<mode>"
-  [(set (match_operand:VI4F_256 0 "register_operand" "=x")
+  [(set (match_operand:VI4F_256 0 "register_operand" "=v")
 	(unspec:VI4F_256
-	  [(match_operand:VI4F_256 1 "nonimmediate_operand" "xm")
-	   (match_operand:V8SI 2 "register_operand" "x")]
+	  [(match_operand:VI4F_256 1 "nonimmediate_operand" "vm")
+	   (match_operand:V8SI 2 "register_operand" "v")]
 	  UNSPEC_VPERMVAR))]
   "TARGET_AVX2"
   "vperm<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
@@ -10508,9 +10568,9 @@
 })
 
 (define_insn "avx2_perm<mode>_1"
-  [(set (match_operand:VI8F_256 0 "register_operand" "=x")
+  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
 	(vec_select:VI8F_256
-	  (match_operand:VI8F_256 1 "nonimmediate_operand" "xm")
+	  (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
 	  (parallel [(match_operand 2 "const_0_to_3_operand")
 		     (match_operand 3 "const_0_to_3_operand")
 		     (match_operand 4 "const_0_to_3_operand")
@@ -10735,9 +10795,9 @@
 })
 
 (define_insn "*avx_vpermilp<mode>"
-  [(set (match_operand:VF 0 "register_operand" "=x")
+  [(set (match_operand:VF 0 "register_operand" "=v")
 	(vec_select:VF
-	  (match_operand:VF 1 "nonimmediate_operand" "xm")
+	  (match_operand:VF 1 "nonimmediate_operand" "vm")
 	  (match_parallel 2 ""
 	    [(match_operand 3 "const_int_operand")])))]
   "TARGET_AVX
@@ -10754,10 +10814,10 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "avx_vpermilvar<mode>3"
-  [(set (match_operand:VF 0 "register_operand" "=x")
+  [(set (match_operand:VF 0 "register_operand" "=v")
 	(unspec:VF
-	  [(match_operand:VF 1 "register_operand" "x")
-	   (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
+	  [(match_operand:VF 1 "register_operand" "v")
+	   (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
 	  UNSPEC_VPERMIL))]
   "TARGET_AVX"
   "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
@@ -11149,10 +11209,10 @@
 })
 
 (define_insn "avx2_ashrv<mode>"
-  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
+  [(set (match_operand:VI4_AVX2 0 "register_operand" "=v")
 	(ashiftrt:VI4_AVX2
-	  (match_operand:VI4_AVX2 1 "register_operand" "x")
-	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
+	  (match_operand:VI4_AVX2 1 "register_operand" "v")
+	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "vm")))]
   "TARGET_AVX2"
   "vpsravd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseishft")
@@ -11160,10 +11220,10 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx2_<shift_insn>v<mode>"
-  [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
+  [(set (match_operand:VI48_AVX2 0 "register_operand" "=v")
 	(any_lshift:VI48_AVX2
-	  (match_operand:VI48_AVX2 1 "register_operand" "x")
-	  (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
+	  (match_operand:VI48_AVX2 1 "register_operand" "v")
+	  (match_operand:VI48_AVX2 2 "nonimmediate_operand" "vm")))]
   "TARGET_AVX2"
   "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseishft")
diff --git a/gcc/config/i386/stringop.def b/gcc/config/i386/stringop.def
new file mode 100644
index 00000000000..1a7d1e88f65
--- /dev/null
+++ b/gcc/config/i386/stringop.def
@@ -0,0 +1,37 @@
+/* Definitions for stringop strategy for IA-32.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License 
+along with GCC; see the files COPYING3.  If not,
+see <http://www.gnu.org/licenses/>.  */
+
+DEF_ENUM
+DEF_ALG (no_stringop, no_stringop)
+DEF_ENUM
+DEF_ALG (libcall, libcall)
+DEF_ENUM
+DEF_ALG (rep_prefix_1_byte, rep_byte)
+DEF_ENUM
+DEF_ALG (rep_prefix_4_byte, rep_4byte)
+DEF_ENUM
+DEF_ALG (rep_prefix_8_byte, rep_8byte)
+DEF_ENUM
+DEF_ALG (loop_1_byte, byte_loop)
+DEF_ENUM
+DEF_ALG (loop, loop)
+DEF_ENUM
+DEF_ALG (unrolled_loop, unrolled_loop)
+DEF_ENUM
+DEF_ALG (vector_loop, vector_loop)
diff --git a/gcc/config/i386/stringop.opt b/gcc/config/i386/stringop.opt
new file mode 100644
index 00000000000..5c5fc906a33
--- /dev/null
+++ b/gcc/config/i386/stringop.opt
@@ -0,0 +1,31 @@
+/* Definitions for stringop option handling for IA-32.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the files COPYING3.  If not,
+see <http://www.gnu.org/licenses/>.  */
+
+Enum(stringop_alg) String(rep_byte) Value(rep_prefix_1_byte)
+
+#undef DEF_ENUM
+#define DEF_ENUM EnumValue
+
+#undef DEF_ALG
+#define DEF_ALG(alg, name) Enum(stringop_alg) String(name) Value(alg)
+
+#include "stringop.def"
+
+#undef DEF_ENUM
+#undef DEF_ALG
diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386
index 3a77e14f5ca..07624cc575e 100644
--- a/gcc/config/i386/t-i386
+++ b/gcc/config/i386/t-i386
@@ -24,7 +24,7 @@ i386.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h dumpfile.h $(TM_H) \
   $(GGC_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h $(CGRAPH_H) \
   $(TREE_GIMPLE_H) $(DWARF2_H) $(DF_H) tm-constrs.h $(PARAMS_H) \
   i386-builtin-types.inc debug.h dwarf2out.h sbitmap.h $(FIBHEAP_H) \
-  $(OPTS_H) $(DIAGNOSTIC_H) $(COMMON_TARGET_H)
+  $(OPTS_H) $(DIAGNOSTIC_H) $(COMMON_TARGET_H) $(CONTEXT_H) $(PASS_MANAGER_H)
 
 i386-c.o: $(srcdir)/config/i386/i386-c.c \
   $(srcdir)/config/i386/i386-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \
diff --git a/gcc/config/i386/x86-64.h b/gcc/config/i386/x86-64.h
index 336343927c8..0c62723ae22 100644
--- a/gcc/config/i386/x86-64.h
+++ b/gcc/config/i386/x86-64.h
@@ -103,3 +103,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 #undef TARGET_ASM_UNIQUE_SECTION
 #define TARGET_ASM_UNIQUE_SECTION  x86_64_elf_unique_section
+
+#undef TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS  x86_64_elf_section_type_flags
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
new file mode 100644
index 00000000000..e3a34ee7b2e
--- /dev/null
+++ b/gcc/config/i386/x86-tune.def
@@ -0,0 +1,232 @@
+/* Definitions of x86 tunable features.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
+   negatively, so enabling for Generic64 seems like good code size
+   tradeoff.  We can't enable it for 32bit generic because it does not
+   work well with PPro base chips.  */
+DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave", 
+	  m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64)
+DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory", 
+          m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE 
+          | m_GENERIC)
+DEF_TUNE (X86_TUNE_ZERO_EXTEND_WITH_AND, "zero_extend_with_and",  m_486 | m_PENT)
+DEF_TUNE (X86_TUNE_UNROLL_STRLEN, "unroll_strlen",
+	 m_486 | m_PENT | m_PPRO | m_ATOM | m_SLM | m_CORE_ALL | m_K6
+	 | m_AMD_MULTIPLE | m_GENERIC)
+/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
+   on simulation result. But after P4 was made, no performance benefit
+   was observed with branch hints.  It also increases the code size.
+   As a result, icc never generates branch hints.  */
+DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS, "branch_prediction_hints", 0)
+DEF_TUNE (X86_TUNE_DOUBLE_WITH_ADD, "double_with_add", ~m_386)
+DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
+          m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE
+          | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC)
+/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
+   partial dependencies.  */
+DEF_TUNE (X86_TUNE_MOVX, "movx",
+          m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE 
+          | m_AMD_MULTIPLE  | m_GENERIC)
+/* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
+   register stalls on Generic32 compilation setting as well.  However
+   in current implementation the partial register stalls are not eliminated
+   very well - they can be introduced via subregs synthesized by combine
+   and can happen in caller/callee saving sequences.  Because this option
+   pays back little on PPro based chips and is in conflict with partial reg
+   dependencies used by Athlon/P4 based chips, it is better to leave it off
+   for generic32 for now.  */
+DEF_TUNE (X86_TUNE_PARTIAL_REG_STALL, "partial_reg_stall", m_PPRO)
+DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall",
+          m_CORE_ALL | m_GENERIC)
+/* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
+ * on 16-bit immediate moves into memory on Core2 and Corei7.  */
+DEF_TUNE (X86_TUNE_LCP_STALL, "lcp_stall", m_CORE_ALL | m_GENERIC)
+DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop", 
+          m_386 | m_486 | m_K6_GEODE)
+DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
+          ~(m_PENT | m_PPRO | m_CORE_ALL | m_ATOM 
+            | m_SLM | m_AMD_MULTIPLE | m_GENERIC))
+DEF_TUNE (X86_TUNE_USE_MOV0, "use_mov0", m_K6)
+DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", ~(m_PENT | m_ATOM | m_SLM | m_K6))
+/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
+DEF_TUNE (X86_TUNE_USE_XCHGB, "use_xchgb", m_PENT4)
+DEF_TUNE (X86_TUNE_SPLIT_LONG_MOVES, "split_long_moves", m_PPRO)
+DEF_TUNE (X86_TUNE_READ_MODIFY_WRITE, "read_modify_write", ~m_PENT)
+DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_PPRO))
+DEF_TUNE (X86_TUNE_PROMOTE_QIMODE, "promote_qimode",
+          m_386 | m_486 | m_PENT | m_CORE_ALL | m_ATOM | m_SLM 
+          | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
+DEF_TUNE (X86_TUNE_FAST_PREFIX, "fast_prefix", ~(m_386 | m_486 | m_PENT))
+DEF_TUNE (X86_TUNE_SINGLE_STRINGOP, "single_stringop", m_386 | m_P4_NOCONA)
+DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", ~0)
+/* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
+   register stalls.  Just like X86_TUNE_PARTIAL_REG_STALL this option
+   might be considered for Generic32 if our scheme for avoiding partial
+   stalls was more effective.  */
+DEF_TUNE (X86_TUNE_HIMODE_MATH, "himode_math", ~m_PPRO)
+DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", 0)
+DEF_TUNE (X86_TUNE_PROMOTE_HI_REGS, "promote_hi_regs", m_PPRO)
+/* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
+   over esp addition.  */
+DEF_TUNE (X86_TUNE_SINGLE_POP, "single_pop", m_386 | m_486 | m_PENT | m_PPRO)
+/* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
+   over esp addition.  */
+DEF_TUNE (X86_TUNE_DOUBLE_POP, "double_pop", m_PENT)
+/* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
+   over esp subtraction.  */
+DEF_TUNE (X86_TUNE_SINGLE_PUSH, "single_push", m_386 | m_486 | m_PENT 
+          | m_K6_GEODE)
+/* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
+   over esp subtraction.  */
+DEF_TUNE (X86_TUNE_DOUBLE_PUSH, "double_push", m_PENT | m_K6_GEODE)
+/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
+   for DFmode copies */
+DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",
+          ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM 
+          | m_GEODE | m_AMD_MULTIPLE | m_GENERIC))
+DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
+          m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE 
+          | m_GENERIC)
+/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
+   conflict here in between PPro/Pentium4 based chips that thread 128bit
+   SSE registers as single units versus K8 based chips that divide SSE
+   registers to two 64bit halves.  This knob promotes all store destinations
+   to be 128bit to allow register renaming on 128bit SSE units, but usually
+   results in one extra microop on 64bit SSE units.  Experimental results
+   shows that disabling this option on P4 brings over 20% SPECfp regression,
+   while enabling it on K8 brings roughly 2.4% regression that can be partly
+    masked by careful scheduling of moves.  */
+DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
+          m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMDFAM10 
+          | m_BDVER | m_GENERIC)
+DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
+          m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER | m_SLM)
+DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
+          m_COREI7 | m_BDVER | m_SLM)
+DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optimal",
+          m_BDVER)
+/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
+   are resolved on SSE register parts instead of whole registers, so we may
+   maintain just lower part of scalar values in proper format leaving the
+   upper part undefined.  */
+DEF_TUNE (X86_TUNE_SSE_SPLIT_REGS, "sse_split_regs", m_ATHLON_K8)
+DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores", m_AMD_MULTIPLE)
+DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor", m_PPRO | m_P4_NOCONA)
+DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
+          m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC)
+DEF_TUNE (X86_TUNE_PROLOGUE_USING_MOVE, "prologue_using_move", 
+          m_PPRO | m_ATHLON_K8)
+DEF_TUNE (X86_TUNE_EPILOGUE_USING_MOVE, "epilogue_using_move",
+          m_PPRO | m_ATHLON_K8)	
+DEF_TUNE (X86_TUNE_SHIFT1, "shift1", ~m_486)
+DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
+DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_TO_VEC, "inter_unit_moves_to_vec",
+          ~(m_AMD_MULTIPLE | m_GENERIC))
+DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, "inter_unit_moves_from_vec",
+          ~m_ATHLON_K8)
+DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions",
+          ~(m_AMDFAM10 | m_BDVER ))
+/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
+   than 4 branch instructions in the 16 byte window.  */
+DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
+          m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM| m_AMD_MULTIPLE 
+          | m_GENERIC)
+DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
+          m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE 
+          | m_AMD_MULTIPLE | m_GENERIC)
+DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
+          m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC)
+DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
+          ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GENERIC))
+DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns",
+          m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC)
+DEF_TUNE (X86_TUNE_PAD_SHORT_FUNCTION, "pad_short_function", m_ATOM)
+DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
+          m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE
+          | m_ATHLON_K8 | m_GENERIC)
+DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
+          m_CORE_ALL | m_K8 | m_GENERIC64)
+/* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
+   and SImode multiply, but 386 and 486 do HImode multiply faster.  */
+DEF_TUNE (X86_TUNE_PROMOTE_HIMODE_IMUL, "promote_himode_imul",
+          ~(m_386 | m_486))
+/* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
+   vector path on AMD machines.  */
+DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM32_MEM, "slow_imul_imm32_mem",
+          m_CORE_ALL | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC64)
+/* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
+   machines.  */
+DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM8, "slow_imul_imm8",
+          m_CORE_ALL | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC64)
+/* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
+   than a MOV.  */
+DEF_TUNE (X86_TUNE_MOVE_M1_VIA_OR, "move_m1_via_or", m_PENT)
+/* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
+   but one byte longer.  */
+DEF_TUNE (X86_TUNE_NOT_UNPAIRABLE, "not_unpairable", m_PENT)
+/* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
+   operand that cannot be represented using a modRM byte.  The XOR
+   replacement is long decoded, so this split helps here as well.  */
+DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6)
+/* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
+   from FP to FP. */
+DEF_TUNE (X86_TUNE_USE_VECTOR_FP_CONVERTS, "use_vector_fp_converts",
+          m_CORE_ALL | m_AMDFAM10 | m_GENERIC)
+/* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
+   from integer to FP. */
+DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
+/* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
+   with a subsequent conditional jump instruction into a single
+   compare-and-branch uop.  */
+DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch", m_BDVER)
+/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
+   will impact LEA instruction selection. */
+DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_ATOM | m_SLM)
+/* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
+   instructions.  */
+DEF_TUNE (X86_TUNE_VECTORIZE_DOUBLE, "vectorize_double", ~m_ATOM)
+/* X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL: Enable software prefetching
+   at -O3.  For the moment, the prefetching seems badly tuned for Intel
+   chips.  */
+DEF_TUNE (X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL, "software_prefetching_beneficial",
+          m_K6_GEODE | m_AMD_MULTIPLE)
+/* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
+   the auto-vectorizer.  */
+DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2)
+/* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
+   during reassociation of integer computation.  */
+DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, "reassoc_int_to_parallel",
+          m_ATOM)
+/* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
+   during reassociation of fp computation.  */
+DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
+          m_ATOM | m_SLM | m_HASWELL | m_BDVER1 | m_BDVER2)
+/* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
+   regs instead of memory.  */
+DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
+          m_CORE_ALL)
+/* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
+   a conditional move.  */
+DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove", m_ATOM)
+/* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
+   fp converts to destination register.  */
+DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts",
+          m_SLM)
diff --git a/gcc/config/ia64/hpux.h b/gcc/config/ia64/hpux.h
index 22cfe9f6677..ca592e4bc26 100644
--- a/gcc/config/ia64/hpux.h
+++ b/gcc/config/ia64/hpux.h
@@ -179,9 +179,10 @@ do {								\
 #undef  TARGET_ASM_RELOC_RW_MASK
 #define TARGET_ASM_RELOC_RW_MASK  ia64_hpux_reloc_rw_mask
 
-/* ia64 HPUX has the float and long double forms of math functions.  */
-#undef TARGET_C99_FUNCTIONS
-#define TARGET_C99_FUNCTIONS  1
+/* ia64 HPUX has the float and long double forms of math functions.
+   We redefine this hook so the version from elfos.h header won't be used.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION default_c99_libc_has_function
 
 #undef TARGET_INIT_LIBFUNCS
 #define TARGET_INIT_LIBFUNCS ia64_hpux_init_libfuncs
diff --git a/gcc/config/linux-android.c b/gcc/config/linux-android.c
index d6e47a70e7a..4a4b48d9882 100644
--- a/gcc/config/linux-android.c
+++ b/gcc/config/linux-android.c
@@ -31,3 +31,17 @@ linux_android_has_ifunc_p (void)
 {
   return TARGET_ANDROID ? false : HAVE_GNU_INDIRECT_FUNCTION;
 }
+
+bool
+linux_android_libc_has_function (enum function_class fn_class)
+{
+  if (OPTION_GLIBC)
+    return true;
+  if (OPTION_BIONIC)
+    if (fn_class == function_c94
+	|| fn_class == function_c99_misc
+	|| fn_class == function_sincos)
+	return true;
+
+  return false;
+}
diff --git a/gcc/config/linux-protos.h b/gcc/config/linux-protos.h
index 3f926e5dffd..d1f0f926367 100644
--- a/gcc/config/linux-protos.h
+++ b/gcc/config/linux-protos.h
@@ -19,3 +19,5 @@ along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
 extern bool linux_android_has_ifunc_p (void);
+
+extern bool linux_android_libc_has_function (enum function_class fn_class);
diff --git a/gcc/config/linux.h b/gcc/config/linux.h
index 2be1079b92f..8116e698d94 100644
--- a/gcc/config/linux.h
+++ b/gcc/config/linux.h
@@ -95,15 +95,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
   CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERX32, UCLIBC_DYNAMIC_LINKERX32, \
 			 BIONIC_DYNAMIC_LINKERX32)
 
-/* Determine whether the entire c99 runtime
-   is present in the runtime library.  */
-#undef TARGET_C99_FUNCTIONS
-#define TARGET_C99_FUNCTIONS (OPTION_GLIBC)
-
-/* Whether we have sincos that follows the GNU extension.  */
-#undef TARGET_HAS_SINCOS
-#define TARGET_HAS_SINCOS (OPTION_GLIBC || OPTION_BIONIC)
-
 /* Whether we have Bionic libc runtime */
 #undef TARGET_HAS_BIONIC
 #define TARGET_HAS_BIONIC (OPTION_BIONIC)
+
+/* Determine what functions are present at the runtime;
+   this includes full c99 runtime and sincos.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION linux_android_libc_has_function
diff --git a/gcc/config/lm32/uclinux-elf.h b/gcc/config/lm32/uclinux-elf.h
index 3a556d7258d..a5e8163cf6f 100644
--- a/gcc/config/lm32/uclinux-elf.h
+++ b/gcc/config/lm32/uclinux-elf.h
@@ -77,3 +77,5 @@
 #undef  CC1_SPEC
 #define CC1_SPEC "%{G*} %{!fno-PIC:-fPIC}"
 
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc/config/m68k/uclinux.h b/gcc/config/m68k/uclinux.h
index 8d743126547..b1af7d2c585 100644
--- a/gcc/config/m68k/uclinux.h
+++ b/gcc/config/m68k/uclinux.h
@@ -67,3 +67,6 @@ along with GCC; see the file COPYING3.  If not see
    sections.  */
 #undef M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
 #define M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 1
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc/config/microblaze/microblaze.h b/gcc/config/microblaze/microblaze.h
index bc4d9a128d1..eb8e45ce17b 100644
--- a/gcc/config/microblaze/microblaze.h
+++ b/gcc/config/microblaze/microblaze.h
@@ -892,6 +892,10 @@ do {									 \
 %{pg:-start-group -lxilprofile -lgloss -lxil -lc -lm -end-group } \
 %{!pg:-start-group -lgloss -lxil -lc -lm -end-group }} "
 
+/* microblaze-unknown-elf target has no support of C99 runtime */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
 #undef  ENDFILE_SPEC
 #define ENDFILE_SPEC "crtend.o%s crtn.o%s"
 
diff --git a/gcc/config/mips/linux-common.h b/gcc/config/mips/linux-common.h
index ca4ea0705a6..ebc67a28d90 100644
--- a/gcc/config/mips/linux-common.h
+++ b/gcc/config/mips/linux-common.h
@@ -44,7 +44,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef  LIB_SPEC
 #define LIB_SPEC							\
   LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC,			\
-		       GNU_USER_TARGET_LIB_SPEC " " ANDROID_LIB_SPEC)
+		    GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC)
 
 #undef  STARTFILE_SPEC
 #define STARTFILE_SPEC							\
diff --git a/gcc/config/mips/linux.h b/gcc/config/mips/linux.h
index 9b4c68db6ee..6736295eb36 100644
--- a/gcc/config/mips/linux.h
+++ b/gcc/config/mips/linux.h
@@ -17,4 +17,9 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+#define GLIBC_DYNAMIC_LINKER \
+  "%{mnan=2008:/lib/ld-linux-mipsn8.so.1;:/lib/ld.so.1}"
+
+#undef UCLIBC_DYNAMIC_LINKER
+#define UCLIBC_DYNAMIC_LINKER \
+  "%{mnan=2008:/lib/ld-uClibc-mipsn8.so.0;:/lib/ld-uClibc.so.0}"
diff --git a/gcc/config/mips/linux64.h b/gcc/config/mips/linux64.h
index dbba47a1a13..421a53a1085 100644
--- a/gcc/config/mips/linux64.h
+++ b/gcc/config/mips/linux64.h
@@ -22,10 +22,22 @@ along with GCC; see the file COPYING3.  If not see
 #define GNU_USER_LINK_EMULATION64 "elf64%{EB:b}%{EL:l}tsmip"
 #define GNU_USER_LINK_EMULATIONN32 "elf32%{EB:b}%{EL:l}tsmipn32"
 
-#define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1"
-#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld.so.1"
-#define GLIBC_DYNAMIC_LINKERN32 "/lib32/ld.so.1"
-#define UCLIBC_DYNAMIC_LINKERN32 "/lib32/ld-uClibc.so.0"
+#define GLIBC_DYNAMIC_LINKER32 \
+  "%{mnan=2008:/lib/ld-linux-mipsn8.so.1;:/lib/ld.so.1}"
+#define GLIBC_DYNAMIC_LINKER64 \
+  "%{mnan=2008:/lib64/ld-linux-mipsn8.so.1;:/lib64/ld.so.1}"
+#define GLIBC_DYNAMIC_LINKERN32 \
+  "%{mnan=2008:/lib32/ld-linux-mipsn8.so.1;:/lib32/ld.so.1}"
+
+#undef UCLIBC_DYNAMIC_LINKER32
+#define UCLIBC_DYNAMIC_LINKER32 \
+  "%{mnan=2008:/lib/ld-uClibc-mipsn8.so.0;:/lib/ld-uClibc.so.0}"
+#undef UCLIBC_DYNAMIC_LINKER64
+#define UCLIBC_DYNAMIC_LINKER64 \
+  "%{mnan=2008:/lib/ld64-uClibc-mipsn8.so.0;:/lib/ld64-uClibc.so.0}"
+#define UCLIBC_DYNAMIC_LINKERN32 \
+  "%{mnan=2008:/lib32/ld-uClibc-mipsn8.so.0;:/lib32/ld-uClibc.so.0}"
+
 #define BIONIC_DYNAMIC_LINKERN32 "/system/bin/linker32"
 #define GNU_USER_DYNAMIC_LINKERN32 \
   CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERN32, UCLIBC_DYNAMIC_LINKERN32, \
diff --git a/gcc/config/mips/mips-modes.def b/gcc/config/mips/mips-modes.def
index ecb7f181d8f..383d2cb6d43 100644
--- a/gcc/config/mips/mips-modes.def
+++ b/gcc/config/mips/mips-modes.def
@@ -17,12 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-/* MIPS has a quirky almost-IEEE format for all its
-   floating point.  */
-RESET_FLOAT_FORMAT (SF, mips_single_format);
-RESET_FLOAT_FORMAT (DF, mips_double_format);
-
-FLOAT_MODE (TF, 16, mips_quad_format);
+FLOAT_MODE (TF, 16, ieee_quad_format);
 
 /* Vector modes.  */
 VECTOR_MODES (INT, 4);        /* V4QI  V2HI      */
diff --git a/gcc/config/mips/mips-opts.h b/gcc/config/mips/mips-opts.h
index dbfcfad0b04..56249d94c4e 100644
--- a/gcc/config/mips/mips-opts.h
+++ b/gcc/config/mips/mips-opts.h
@@ -27,6 +27,13 @@ enum mips_code_readable_setting {
   CODE_READABLE_YES
 };
 
+/* Enumerates the setting of the -mabs and -mnan options.  */
+enum mips_ieee_754_setting {
+  MIPS_IEEE_754_DEFAULT,
+  MIPS_IEEE_754_LEGACY,
+  MIPS_IEEE_754_2008
+};
+
 /* Enumerates the setting of the -mr10k-cache-barrier option.  */
 enum mips_r10k_cache_barrier_setting {
   R10K_CACHE_BARRIER_NONE,
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index a56757c877f..5993aabe578 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -56,6 +56,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "target-globals.h"
 #include "opts.h"
 #include "tree-pass.h"
+#include "context.h"
 
 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF.  */
 #define UNSPEC_ADDRESS_P(X)					\
@@ -8843,6 +8844,11 @@ mips_file_start (void)
     fprintf (asm_out_file, "\t.section .gcc_compiled_long%d\n"
 	     "\t.previous\n", TARGET_LONG64 ? 64 : 32);
 
+  /* Record the NaN encoding.  */
+  if (HAVE_AS_NAN || mips_nan != MIPS_IEEE_754_DEFAULT)
+    fprintf (asm_out_file, "\t.nan\t%s\n",
+	     mips_nan == MIPS_IEEE_754_2008 ? "2008" : "legacy");
+
 #ifdef HAVE_AS_GNU_ATTRIBUTE
   {
     int attr;
@@ -12291,6 +12297,7 @@ mips_adjust_insn_length (rtx insn, int length)
   /* mips.md uses MAX_PIC_BRANCH_LENGTH as a placeholder for the length
      of a PIC long-branch sequence.  Substitute the correct value.  */
   if (length == MAX_PIC_BRANCH_LENGTH
+      && JUMP_P (insn)
       && INSN_CODE (insn) >= 0
       && get_attr_type (insn) == TYPE_BRANCH)
     {
@@ -12312,7 +12319,9 @@ mips_adjust_insn_length (rtx insn, int length)
     length += TARGET_MIPS16 ? 2 : 4;
 
   /* See how many nops might be needed to avoid hardware hazards.  */
-  if (!cfun->machine->ignore_hazard_length_p && INSN_CODE (insn) >= 0)
+  if (!cfun->machine->ignore_hazard_length_p
+      && INSN_P (insn)
+      && INSN_CODE (insn) >= 0)
     switch (get_attr_hazard (insn))
       {
       case HAZARD_NONE:
@@ -16332,33 +16341,43 @@ mips_machine_reorg2 (void)
   return 0;
 }
 
-struct rtl_opt_pass pass_mips_machine_reorg2 =
-{
- {
-  RTL_PASS,
-  "mach2",				/* name */
-  OPTGROUP_NONE,			/* optinfo_flags */
-  NULL,					/* gate */
-  mips_machine_reorg2,			/* execute */
-  NULL,					/* sub */
-  NULL,					/* next */
-  0,					/* static_pass_number */
-  TV_MACH_DEP,				/* tv_id */
-  0,					/* properties_required */
-  0,					/* properties_provided */
-  0,					/* properties_destroyed */
-  0,					/* todo_flags_start */
-  TODO_verify_rtl_sharing,		/* todo_flags_finish */
- }
-};
+namespace {
 
-struct register_pass_info insert_pass_mips_machine_reorg2 =
+const pass_data pass_data_mips_machine_reorg2 =
 {
-  &pass_mips_machine_reorg2.pass,	/* pass */
-  "dbr",				/* reference_pass_name */
-  1,					/* ref_pass_instance_number */
-  PASS_POS_INSERT_AFTER			/* po_op */
+  RTL_PASS, /* type */
+  "mach2", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  false, /* has_gate */
+  true, /* has_execute */
+  TV_MACH_DEP, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_verify_rtl_sharing, /* todo_flags_finish */
 };
+
+class pass_mips_machine_reorg2 : public rtl_opt_pass
+{
+public:
+  pass_mips_machine_reorg2(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_mips_machine_reorg2, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  unsigned int execute () { return mips_machine_reorg2 (); }
+
+}; // class pass_mips_machine_reorg2
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_mips_machine_reorg2 (gcc::context *ctxt)
+{
+  return new pass_mips_machine_reorg2 (ctxt);
+}
+
 
 /* Implement TARGET_ASM_OUTPUT_MI_THUNK.  Generate rtl rather than asm text
    in order to avoid duplicating too much logic from elsewhere.  */
@@ -16980,6 +16999,15 @@ mips_option_override (void)
 	}
     }
 
+  /* Pre-IEEE 754-2008 MIPS hardware has a quirky almost-IEEE format
+     for all its floating point.  */
+  if (mips_nan != MIPS_IEEE_754_2008)
+    {
+      REAL_MODE_FORMAT (SFmode) = &mips_single_format;
+      REAL_MODE_FORMAT (DFmode) = &mips_double_format;
+      REAL_MODE_FORMAT (TFmode) = &mips_quad_format;
+    }
+
   /* Make sure that the user didn't turn off paired single support when
      MIPS-3D support is requested.  */
   if (TARGET_MIPS3D
@@ -17143,6 +17171,14 @@ mips_option_override (void)
   /* We register a second machine specific reorg pass after delay slot
      filling.  Registering the pass must be done at start up.  It's
      convenient to do it here.  */
+  opt_pass *new_pass = make_pass_mips_machine_reorg2 (g);
+  struct register_pass_info insert_pass_mips_machine_reorg2 =
+    {
+      new_pass,		/* pass */
+      "dbr",			/* reference_pass_name */
+      1,			/* ref_pass_instance_number */
+      PASS_POS_INSERT_AFTER	/* po_op */
+    };
   register_pass (&insert_pass_mips_machine_reorg2);
 
   if (TARGET_HARD_FLOAT_ABI && TARGET_MIPS5900)
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 2dcccd48b8f..af7eeee6682 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -507,6 +507,12 @@ struct mips_cpu_info {
       if (TARGET_PAIRED_SINGLE_FLOAT)					\
 	builtin_define ("__mips_paired_single_float");			\
 									\
+      if (mips_abs == MIPS_IEEE_754_2008)				\
+	builtin_define ("__mips_abs2008");				\
+									\
+      if (mips_nan == MIPS_IEEE_754_2008)				\
+	builtin_define ("__mips_nan2008");				\
+									\
       if (TARGET_BIG_ENDIAN)						\
 	{								\
 	  builtin_define_std ("MIPSEB");				\
@@ -743,6 +749,7 @@ struct mips_cpu_info {
    --with-abi is ignored if -mabi is specified.
    --with-float is ignored if -mhard-float or -msoft-float are
      specified.
+   --with-nan is ignored if -mnan is specified.
    --with-divide is ignored if -mdivide-traps or -mdivide-breaks are
      specified. */
 #define OPTION_DEFAULT_SPECS \
@@ -755,6 +762,7 @@ struct mips_cpu_info {
   {"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \
   {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" }, \
   {"fpu", "%{!msingle-float:%{!mdouble-float:-m%(VALUE)-float}}" }, \
+  {"nan", "%{!mnan=*:-mnan=%(VALUE)}" }, \
   {"divide", "%{!mdivide-traps:%{!mdivide-breaks:-mdivide-%(VALUE)}}" }, \
   {"llsc", "%{!mllsc:%{!mno-llsc:-m%(VALUE)}}" }, \
   {"mips-plt", "%{!mplt:%{!mno-plt:-m%(VALUE)}}" }, \
@@ -1160,7 +1168,7 @@ struct mips_cpu_info {
 %(subtarget_asm_debugging_spec) \
 %{mabi=*} %{!mabi=*: %(asm_abi_default_spec)} \
 %{mgp32} %{mgp64} %{march=*} %{mxgot:-xgot} \
-%{mfp32} %{mfp64} \
+%{mfp32} %{mfp64} %{mnan=*} \
 %{mshared} %{mno-shared} \
 %{msym32} %{mno-sym32} \
 %{mtune=*} \
@@ -2897,6 +2905,10 @@ while (0)
 #define HAVE_AS_TLS 0
 #endif
 
+#ifndef HAVE_AS_NAN
+#define HAVE_AS_NAN 0
+#endif
+
 #ifndef USED_FOR_TARGET
 /* Information about ".set noFOO; ...; .set FOO" blocks.  */
 struct mips_asm_switch {
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index ca79a31e29a..0cda169224f 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -2711,14 +2711,15 @@
 ;; Do not use the integer abs macro instruction, since that signals an
 ;; exception on -2147483648 (sigh).
 
-;; abs.fmt is an arithmetic instruction and treats all NaN inputs as
-;; invalid; it does not clear their sign bits.  We therefore can't use
-;; abs.fmt if the signs of NaNs matter.
+;; The "legacy" (as opposed to "2008") form of ABS.fmt is an arithmetic
+;; instruction that treats all NaN inputs as invalid; it does not clear
+;; their sign bit.  We therefore can't use that form if the signs of
+;; NaNs matter.
 
 (define_insn "abs<mode>2"
   [(set (match_operand:ANYF 0 "register_operand" "=f")
 	(abs:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
-  "!HONOR_NANS (<MODE>mode)"
+  "mips_abs == MIPS_IEEE_754_2008 || !HONOR_NANS (<MODE>mode)"
   "abs.<fmt>\t%0,%1"
   [(set_attr "type" "fabs")
    (set_attr "mode" "<UNITMODE>")])
@@ -2793,14 +2794,15 @@
   [(set_attr "alu_type"	"sub")
    (set_attr "mode"	"DI")])
 
-;; neg.fmt is an arithmetic instruction and treats all NaN inputs as
-;; invalid; it does not flip their sign bit.  We therefore can't use
-;; neg.fmt if the signs of NaNs matter.
+;; The "legacy" (as opposed to "2008") form of NEG.fmt is an arithmetic
+;; instruction that treats all NaN inputs as invalid; it does not flip
+;; their sign bit.  We therefore can't use that form if the signs of
+;; NaNs matter.
 
 (define_insn "neg<mode>2"
   [(set (match_operand:ANYF 0 "register_operand" "=f")
 	(neg:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
-  "!HONOR_NANS (<MODE>mode)"
+  "mips_abs == MIPS_IEEE_754_2008 || !HONOR_NANS (<MODE>mode)"
   "neg.<fmt>\t%0,%1"
   [(set_attr "type" "fneg")
    (set_attr "mode" "<UNITMODE>")])
@@ -6671,8 +6673,13 @@
   "ISA_HAS_PREFETCH && TARGET_EXPLICIT_RELOCS"
 {
   if (TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A)
-    /* Loongson 2[ef] and Loongson 3a use load to $0 to perform prefetching.  */
-    return "ld\t$0,%a0";
+    {
+      /* Loongson 2[ef] and Loongson 3a use load to $0 for prefetching.  */
+      if (TARGET_64BIT)
+        return "ld\t$0,%a0";
+      else
+        return "lw\t$0,%a0";
+    }
   operands[1] = mips_prefetch_cookie (operands[1], operands[2]);
   return "pref\t%1,%a0";
 }
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 08ab29b1810..0324041dbea 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -205,6 +205,24 @@ mfused-madd
 Target Report Mask(FUSED_MADD)
 Generate floating-point multiply-add instructions
 
+mabs=
+Target RejectNegative Joined Enum(mips_ieee_754_value) Var(mips_abs) Init(MIPS_IEEE_754_DEFAULT)
+-mabs=MODE	Select the IEEE 754 ABS/NEG instruction execution mode
+
+mnan=
+Target RejectNegative Joined Enum(mips_ieee_754_value) Var(mips_nan) Init(MIPS_IEEE_754_DEFAULT)
+-mnan=ENCODING	Select the IEEE 754 NaN data encoding
+
+Enum
+Name(mips_ieee_754_value) Type(int)
+Known MIPS IEEE 754 settings (for use with the -mabs= and -mnan= options):
+
+EnumValue
+Enum(mips_ieee_754_value) String(2008) Value(MIPS_IEEE_754_2008)
+
+EnumValue
+Enum(mips_ieee_754_value) String(legacy) Value(MIPS_IEEE_754_LEGACY)
+
 mgp32
 Target Report RejectNegative InverseMask(64BIT)
 Use 32-bit general registers
diff --git a/gcc/config/mips/mti-linux.h b/gcc/config/mips/mti-linux.h
index 45bc0b88107..96dcac4dfb3 100644
--- a/gcc/config/mips/mti-linux.h
+++ b/gcc/config/mips/mti-linux.h
@@ -20,7 +20,7 @@ along with GCC; see the file COPYING3.  If not see
 /* This target is a multilib target, specify the sysroot paths.  */
 #undef SYSROOT_SUFFIX_SPEC
 #define SYSROOT_SUFFIX_SPEC \
-    "%{mips32:/mips32}%{mips64:/mips64}%{mips64r2:/mips64r2}%{mips16:/mips16}%{mmicromips:/micromips}%{mabi=64:/64}%{mel|EL:/el}%{msoft-float:/sof}"
+    "%{mips32:/mips32}%{mips64:/mips64}%{mips64r2:/mips64r2}%{mips16:/mips16}%{mmicromips:/micromips}%{mabi=64:/64}%{mel|EL:/el}%{msoft-float:/sof}%{mnan=2008:/nan2008}"
 
 #undef DRIVER_SELF_SPECS
 #define DRIVER_SELF_SPECS						\
diff --git a/gcc/config/mips/t-mti-elf b/gcc/config/mips/t-mti-elf
index bce8f063452..4aec70cb807 100644
--- a/gcc/config/mips/t-mti-elf
+++ b/gcc/config/mips/t-mti-elf
@@ -19,8 +19,8 @@
 # The default build is mips32r2, hard-float big-endian.  Add mips32,
 # soft-float, and little-endian variations.
 
-MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16 mmicromips mabi=64 EL msoft-float
-MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof
+MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16/mmicromips mabi=64 EL msoft-float/mnan=2008
+MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof nan2008
 MULTILIB_MATCHES = EL=mel EB=meb
 
 # The 64 bit ABI is not supported on the mips32 architecture.
@@ -36,9 +36,7 @@ MULTILIB_EXCEPTIONS += mabi=64*
 MULTILIB_EXCEPTIONS += *mips64*/*mips16*
 MULTILIB_EXCEPTIONS += *mips16/mabi=64*
 
-# We only want micromips for mips32r2 architecture and we do not want
-# it used in conjunction with -mips16.
-MULTILIB_EXCEPTIONS += *mips16/mmicromips*
+# We only want micromips for mips32r2 architecture.
 MULTILIB_EXCEPTIONS += *mips64*/mmicromips*
 MULTILIB_EXCEPTIONS += *mips32/mmicromips*
 MULTILIB_EXCEPTIONS += *mmicromips/mabi=64*
diff --git a/gcc/config/mips/t-mti-linux b/gcc/config/mips/t-mti-linux
index bce8f063452..4aec70cb807 100644
--- a/gcc/config/mips/t-mti-linux
+++ b/gcc/config/mips/t-mti-linux
@@ -19,8 +19,8 @@
 # The default build is mips32r2, hard-float big-endian.  Add mips32,
 # soft-float, and little-endian variations.
 
-MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16 mmicromips mabi=64 EL msoft-float
-MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof
+MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16/mmicromips mabi=64 EL msoft-float/mnan=2008
+MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof nan2008
 MULTILIB_MATCHES = EL=mel EB=meb
 
 # The 64 bit ABI is not supported on the mips32 architecture.
@@ -36,9 +36,7 @@ MULTILIB_EXCEPTIONS += mabi=64*
 MULTILIB_EXCEPTIONS += *mips64*/*mips16*
 MULTILIB_EXCEPTIONS += *mips16/mabi=64*
 
-# We only want micromips for mips32r2 architecture and we do not want
-# it used in conjunction with -mips16.
-MULTILIB_EXCEPTIONS += *mips16/mmicromips*
+# We only want micromips for mips32r2 architecture.
 MULTILIB_EXCEPTIONS += *mips64*/mmicromips*
 MULTILIB_EXCEPTIONS += *mips32/mmicromips*
 MULTILIB_EXCEPTIONS += *mmicromips/mabi=64*
diff --git a/gcc/config/mmix/mmix.h b/gcc/config/mmix/mmix.h
index c5edc5777a9..2d5e1a8a392 100644
--- a/gcc/config/mmix/mmix.h
+++ b/gcc/config/mmix/mmix.h
@@ -813,6 +813,10 @@ typedef struct { int regs; int lib; } CUMULATIVE_ARGS;
 
 #define NO_IMPLICIT_EXTERN_C
 
+/* mmix-knuth-mmixware target has no support of C99 runtime */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
 /* These are checked.  */
 #define DOLLARS_IN_IDENTIFIERS 0
 #define NO_DOLLAR_IN_LABEL
diff --git a/gcc/config/moxie/uclinux.h b/gcc/config/moxie/uclinux.h
index 498037e8072..85c65f257ce 100644
--- a/gcc/config/moxie/uclinux.h
+++ b/gcc/config/moxie/uclinux.h
@@ -37,3 +37,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  --wrap=mmap --wrap=munmap --wrap=alloca\
  %{fmudflapth: --wrap=pthread_create\
 }} %{fmudflap|fmudflapth: --wrap=main}"
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc/config/netbsd.h b/gcc/config/netbsd.h
index 71c9183be0d..dd50dcc0ec4 100644
--- a/gcc/config/netbsd.h
+++ b/gcc/config/netbsd.h
@@ -139,6 +139,9 @@ along with GCC; see the file COPYING3.  If not see
 #undef LIBGCC_SPEC
 #define LIBGCC_SPEC NETBSD_LIBGCC_SPEC
 
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
 /* When building shared libraries, the initialization and finalization 
    functions for the library are .init and .fini respectively.  */
 
diff --git a/gcc/config/openbsd.h b/gcc/config/openbsd.h
index 6537451f5f5..0d118b46328 100644
--- a/gcc/config/openbsd.h
+++ b/gcc/config/openbsd.h
@@ -145,8 +145,10 @@ while (0)
 
 #define TARGET_POSIX_IO
 
-/* All new versions of OpenBSD have C99 functions.  */
-#define TARGET_C99_FUNCTIONS 1
+/* All new versions of OpenBSD have C99 functions.  We redefine this hook
+   so the version from elfos.h header won't be used.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION default_libc_has_function
 
 
 /* Runtime target specification.  */
diff --git a/gcc/config/pa/pa-hpux.h b/gcc/config/pa/pa-hpux.h
index c384824fbf6..9685bb25a57 100644
--- a/gcc/config/pa/pa-hpux.h
+++ b/gcc/config/pa/pa-hpux.h
@@ -114,3 +114,6 @@ along with GCC; see the file COPYING3.  If not see
    compatibility with the HP-UX unwind library.  */
 #undef TARGET_HPUX_UNWIND_LIBRARY
 #define TARGET_HPUX_UNWIND_LIBRARY 1
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index be07d2a229a..80c4d43401d 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -833,7 +833,7 @@
 (define_insn "scc"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(match_operator:SI 3 "comparison_operator"
-			   [(match_operand:SI 1 "register_operand" "r")
+			   [(match_operand:SI 1 "reg_or_0_operand" "rM")
 			    (match_operand:SI 2 "arith11_operand" "rI")]))]
   ""
   "{com%I2clr|cmp%I2clr},%B3 %2,%1,%0\;ldi 1,%0"
@@ -843,7 +843,7 @@
 (define_insn ""
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(match_operator:DI 3 "comparison_operator"
-			   [(match_operand:DI 1 "register_operand" "r")
+			   [(match_operand:DI 1 "reg_or_0_operand" "rM")
 			    (match_operand:DI 2 "arith11_operand" "rI")]))]
   "TARGET_64BIT"
   "cmp%I2clr,*%B3 %2,%1,%0\;ldi 1,%0"
@@ -853,10 +853,10 @@
 (define_insn "iorscc"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(ior:SI (match_operator:SI 3 "comparison_operator"
-				   [(match_operand:SI 1 "register_operand" "r")
+				   [(match_operand:SI 1 "reg_or_0_operand" "rM")
 				    (match_operand:SI 2 "arith11_operand" "rI")])
 		(match_operator:SI 6 "comparison_operator"
-				   [(match_operand:SI 4 "register_operand" "r")
+				   [(match_operand:SI 4 "reg_or_0_operand" "rM")
 				    (match_operand:SI 5 "arith11_operand" "rI")])))]
   ""
   "{com%I2clr|cmp%I2clr},%S3 %2,%1,%%r0\;{com%I5clr|cmp%I5clr},%B6 %5,%4,%0\;ldi 1,%0"
@@ -866,10 +866,10 @@
 (define_insn ""
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(ior:DI (match_operator:DI 3 "comparison_operator"
-				   [(match_operand:DI 1 "register_operand" "r")
+				   [(match_operand:DI 1 "reg_or_0_operand" "rM")
 				    (match_operand:DI 2 "arith11_operand" "rI")])
 		(match_operator:DI 6 "comparison_operator"
-				   [(match_operand:DI 4 "register_operand" "r")
+				   [(match_operand:DI 4 "reg_or_0_operand" "rM")
 				    (match_operand:DI 5 "arith11_operand" "rI")])))]
   "TARGET_64BIT"
   "cmp%I2clr,*%S3 %2,%1,%%r0\;cmp%I5clr,*%B6 %5,%4,%0\;ldi 1,%0"
@@ -881,7 +881,7 @@
 (define_insn "negscc"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(neg:SI (match_operator:SI 3 "comparison_operator"
-	       [(match_operand:SI 1 "register_operand" "r")
+	       [(match_operand:SI 1 "reg_or_0_operand" "rM")
 		(match_operand:SI 2 "arith11_operand" "rI")])))]
   ""
   "{com%I2clr|cmp%I2clr},%B3 %2,%1,%0\;ldi -1,%0"
@@ -891,7 +891,7 @@
 (define_insn ""
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(neg:DI (match_operator:DI 3 "comparison_operator"
-	       [(match_operand:DI 1 "register_operand" "r")
+	       [(match_operand:DI 1 "reg_or_0_operand" "rM")
 		(match_operand:DI 2 "arith11_operand" "rI")])))]
   "TARGET_64BIT"
   "cmp%I2clr,*%B3 %2,%1,%0\;ldi -1,%0"
diff --git a/gcc/config/pdp11/pdp11.h b/gcc/config/pdp11/pdp11.h
index d61db4c3bd0..d4bc19a00f1 100644
--- a/gcc/config/pdp11/pdp11.h
+++ b/gcc/config/pdp11/pdp11.h
@@ -666,3 +666,7 @@ extern rtx cc0_reg_rtx;
 #define COMPARE_FLAG_MODE HImode
 
 #define TARGET_HAVE_NAMED_SECTIONS false
+
+/* pdp11-unknown-aout target has no support of C99 runtime */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc/config/picochip/picochip.h b/gcc/config/picochip/picochip.h
index d43ec20e440..13414c6cc9c 100644
--- a/gcc/config/picochip/picochip.h
+++ b/gcc/config/picochip/picochip.h
@@ -656,4 +656,8 @@ enum picochip_builtins
    not detecting this. */
 #define HAVE_AS_LEB128 1
 
+/* picochip-unknown-none target has no support of C99 runtime */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
 /* The End */
diff --git a/gcc/config/rl78/rl78.c b/gcc/config/rl78/rl78.c
index c2ed7389bc4..d7cacc16352 100644
--- a/gcc/config/rl78/rl78.c
+++ b/gcc/config/rl78/rl78.c
@@ -49,6 +49,7 @@
 #include "rl78-protos.h"
 #include "dumpfile.h"
 #include "tree-pass.h"
+#include "context.h"
 
 static inline bool is_interrupt_func (const_tree decl);
 static inline bool is_brk_interrupt_func (const_tree decl);
@@ -129,30 +130,45 @@ devirt_pass (void)
 /* This pass converts virtual instructions using virtual registers, to
    real instructions using real registers.  Rather than run it as
    reorg, we reschedule it before vartrack to help with debugging.  */
-static struct opt_pass rl78_devirt_pass =
-{
-  RTL_PASS,
-  "devirt",
-  OPTGROUP_NONE,                        /* optinfo_flags */
-  devirt_gate,
-  devirt_pass,
-  NULL,
-  NULL,
-  212,
-  TV_MACH_DEP,
-  0, 0, 0,
-  0,
-  0
+namespace {
+
+const pass_data pass_data_rl78_devirt =
+{
+  RTL_PASS, /* type */
+  "devirt", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_MACH_DEP, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
 };
 
-static struct register_pass_info rl78_devirt_info =
+class pass_rl78_devirt : public rtl_opt_pass
 {
-  & rl78_devirt_pass,
-  "vartrack",
-  1,
-  PASS_POS_INSERT_BEFORE
+public:
+  pass_rl78_devirt(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_rl78_devirt, ctxt)
+  {
+  }
+
+  /* opt_pass methods: */
+  bool gate () { return devirt_gate (); }
+  unsigned int execute () { return devirt_pass (); }
 };
 
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_rl78_devirt (gcc::context *ctxt)
+{
+  return new pass_rl78_devirt (ctxt);
+}
+
+
 #undef  TARGET_ASM_FILE_START
 #define TARGET_ASM_FILE_START rl78_asm_file_start
 
@@ -167,6 +183,15 @@ rl78_asm_file_start (void)
       fprintf (asm_out_file, "r%d\t=\t0x%x\n", 16 + i, 0xffee8 + i);
     }
 
+  opt_pass *rl78_devirt_pass = make_pass_rl78_devirt (g);
+  struct register_pass_info rl78_devirt_info =
+    {
+      rl78_devirt_pass,
+      "vartrack",
+      1,
+      PASS_POS_INSERT_BEFORE
+    };
+
   register_pass (& rl78_devirt_info);
 }
 
diff --git a/gcc/config/rs6000/aix43.h b/gcc/config/rs6000/aix43.h
index 70db7f7482f..b27c046021a 100644
--- a/gcc/config/rs6000/aix43.h
+++ b/gcc/config/rs6000/aix43.h
@@ -159,3 +159,6 @@ do {									\
 #define TARGET_USES_AIX64_OPT 1
 
 #define TARGET_AIX_VERSION 43
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc/config/rs6000/aix51.h b/gcc/config/rs6000/aix51.h
index 669dbbe03f3..3837bfdc0bb 100644
--- a/gcc/config/rs6000/aix51.h
+++ b/gcc/config/rs6000/aix51.h
@@ -163,3 +163,6 @@ do {									\
 #define TARGET_USE_JCR_SECTION 0
 
 #define TARGET_AIX_VERSION 51
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc/config/rs6000/aix52.h b/gcc/config/rs6000/aix52.h
index c57271a5a58..51954718b2e 100644
--- a/gcc/config/rs6000/aix52.h
+++ b/gcc/config/rs6000/aix52.h
@@ -166,10 +166,6 @@ do {									\
 #undef LD_INIT_SWITCH
 #define LD_INIT_SWITCH "-binitfini"
 
-/* AIX 5.2 has the float and long double forms of math functions.  */
-#undef TARGET_C99_FUNCTIONS
-#define TARGET_C99_FUNCTIONS  1
-
 #ifndef _AIX52
 extern long long int    atoll(const char *);  
 #endif
diff --git a/gcc/config/rs6000/aix53.h b/gcc/config/rs6000/aix53.h
index b1b0759e7ff..b3bd73a6988 100644
--- a/gcc/config/rs6000/aix53.h
+++ b/gcc/config/rs6000/aix53.h
@@ -166,10 +166,6 @@ do {									\
 #undef LD_INIT_SWITCH
 #define LD_INIT_SWITCH "-binitfini"
 
-/* AIX 5.2 has the float and long double forms of math functions.  */
-#undef TARGET_C99_FUNCTIONS
-#define TARGET_C99_FUNCTIONS  1
-
 #ifndef _AIX52
 extern long long int    atoll(const char *);  
 #endif
diff --git a/gcc/config/rs6000/aix61.h b/gcc/config/rs6000/aix61.h
index cd341b97eea..b0778143773 100644
--- a/gcc/config/rs6000/aix61.h
+++ b/gcc/config/rs6000/aix61.h
@@ -190,10 +190,6 @@ do {									\
 #undef LD_INIT_SWITCH
 #define LD_INIT_SWITCH "-binitfini"
 
-/* AIX 5.2 has the float and long double forms of math functions.  */
-#undef TARGET_C99_FUNCTIONS
-#define TARGET_C99_FUNCTIONS  1
-
 #ifndef _AIX52
 extern long long int    atoll(const char *);  
 #endif
diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h
index 0cf2f4c346d..d5919c4c71d 100644
--- a/gcc/config/rs6000/darwin.h
+++ b/gcc/config/rs6000/darwin.h
@@ -386,10 +386,8 @@ extern int darwin_emit_branch_islands;
 #define OFFS_ASSIGNIVAR_FAST		0xFFFEFEC0
 
 /* Old versions of Mac OS/Darwin don't have C99 functions available.  */
-#undef TARGET_C99_FUNCTIONS
-#define TARGET_C99_FUNCTIONS					\
-  (TARGET_64BIT							\
-   || strverscmp (darwin_macosx_version_min, "10.3") >= 0)
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION darwin_libc_has_function
 
 /* When generating kernel code or kexts, we don't use Altivec by
    default, as kernel code doesn't save/restore those registers.  */
diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md
index 052ac482e0f..9a846239b04 100644
--- a/gcc/config/rs6000/dfp.md
+++ b/gcc/config/rs6000/dfp.md
@@ -132,11 +132,14 @@
   "")
 
 (define_insn "*negtd2_fpr"
-  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
-	(neg:TD (match_operand:TD 1 "gpc_reg_operand" "d")))]
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d")
+	(neg:TD (match_operand:TD 1 "gpc_reg_operand" "0,d")))]
   "TARGET_HARD_FLOAT && TARGET_FPRS"
-  "fneg %0,%1"
-  [(set_attr "type" "fp")])
+  "@
+   fneg %0,%1
+   fneg %0,%1\;fmr %L0,%L1"
+  [(set_attr "type" "fp")
+   (set_attr "length" "4,8")])
 
 (define_expand "abstd2"
   [(set (match_operand:TD 0 "gpc_reg_operand" "")
@@ -145,18 +148,24 @@
   "")
 
 (define_insn "*abstd2_fpr"
-  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
-	(abs:TD (match_operand:TD 1 "gpc_reg_operand" "d")))]
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d")
+	(abs:TD (match_operand:TD 1 "gpc_reg_operand" "0,d")))]
   "TARGET_HARD_FLOAT && TARGET_FPRS"
-  "fabs %0,%1"
-  [(set_attr "type" "fp")])
+  "@
+   fabs %0,%1
+   fabs %0,%1\;fmr %L0,%L1"
+  [(set_attr "type" "fp")
+   (set_attr "length" "4,8")])
 
 (define_insn "*nabstd2_fpr"
-  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
-	(neg:TD (abs:TD (match_operand:TD 1 "gpc_reg_operand" "d"))))]
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d")
+	(neg:TD (abs:TD (match_operand:TD 1 "gpc_reg_operand" "0,d"))))]
   "TARGET_HARD_FLOAT && TARGET_FPRS"
-  "fnabs %0,%1"
-  [(set_attr "type" "fp")])
+  "@
+   fnabs %0,%1
+   fnabs %0,%1\;fmr %L0,%L1"
+  [(set_attr "type" "fp")
+   (set_attr "length" "4,8")])
 
 ;; Hardware support for decimal floating point operations.
 
diff --git a/gcc/config/rs6000/linux.h b/gcc/config/rs6000/linux.h
index f7f2d80c4f2..2e5a56b3929 100644
--- a/gcc/config/rs6000/linux.h
+++ b/gcc/config/rs6000/linux.h
@@ -28,17 +28,18 @@
 
 #ifdef SINGLE_LIBC
 #define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
+#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
+#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
 #else
 #define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
+#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
+#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
 #endif
 
-/* glibc has float and long double forms of math functions.  */
-#undef  TARGET_C99_FUNCTIONS
-#define TARGET_C99_FUNCTIONS (OPTION_GLIBC)
-
-/* Whether we have sincos that follows the GNU extension.  */
-#undef  TARGET_HAS_SINCOS
-#define TARGET_HAS_SINCOS (OPTION_GLIBC)
+/* Determine what functions are present at the runtime;
+   this includes full c99 runtime and sincos.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION linux_android_libc_has_function
 
 #undef  TARGET_OS_CPP_BUILTINS
 #define TARGET_OS_CPP_BUILTINS()		\
diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h
index 79f0f0b5f00..439f53f2d23 100644
--- a/gcc/config/rs6000/linux64.h
+++ b/gcc/config/rs6000/linux64.h
@@ -288,17 +288,18 @@ extern int dot_symbols;
 
 #ifdef SINGLE_LIBC
 #define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
+#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
+#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
 #else
 #define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
+#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
+#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
 #endif
 
-/* glibc has float and long double forms of math functions.  */
-#undef  TARGET_C99_FUNCTIONS
-#define TARGET_C99_FUNCTIONS (OPTION_GLIBC)
-
-/* Whether we have sincos that follows the GNU extension.  */
-#undef  TARGET_HAS_SINCOS
-#define TARGET_HAS_SINCOS (OPTION_GLIBC)
+/* Determine what functions are present at the runtime;
+   this includes full c99 runtime and sincos.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION linux_android_libc_has_function
 
 #undef  TARGET_OS_CPP_BUILTINS
 #define TARGET_OS_CPP_BUILTINS()			\
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 18912f15a4a..7338e764c5c 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1702,3 +1702,99 @@
 
   return GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_TOCREL;
 })
+
+;; Match the first insn (addis) in fusing the combination of addis and loads to
+;; GPR registers on power8.
+(define_predicate "fusion_gpr_addis"
+  (match_code "const_int,high,plus")
+{
+  HOST_WIDE_INT value;
+  rtx int_const;
+
+  if (GET_CODE (op) == HIGH)
+    return 1;
+
+  if (CONST_INT_P (op))
+    int_const = op;
+
+  else if (GET_CODE (op) == PLUS
+	   && base_reg_operand (XEXP (op, 0), Pmode)
+	   && CONST_INT_P (XEXP (op, 1)))
+    int_const = XEXP (op, 1);
+
+  else
+    return 0;
+
+  /* Power8 currently will only do the fusion if the top 11 bits of the addis
+     value are all 1's or 0's.  */
+  value = INTVAL (int_const);
+  if ((value & (HOST_WIDE_INT)0xffff) != 0)
+    return 0;
+
+  if ((value & (HOST_WIDE_INT)0xffff0000) == 0)
+    return 0;
+
+  return (IN_RANGE (value >> 16, -32, 31));
+})
+
+;; Match the second insn (lbz, lhz, lwz, ld) in fusing the combination of addis
+;; and loads to GPR registers on power8.
+(define_predicate "fusion_gpr_mem_load"
+  (match_code "mem,sign_extend,zero_extend")
+{
+  rtx addr;
+
+  /* Handle sign/zero extend.  */
+  if (GET_CODE (op) == ZERO_EXTEND
+      || (TARGET_P8_FUSION_SIGN && GET_CODE (op) == SIGN_EXTEND))
+    {
+      op = XEXP (op, 0);
+      mode = GET_MODE (op);
+    }
+
+  if (!MEM_P (op))
+    return 0;
+
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+      break;
+
+    case DImode:
+      if (!TARGET_POWERPC64)
+	return 0;
+      break;
+
+    default:
+      return 0;
+    }
+
+  addr = XEXP (op, 0);
+  if (GET_CODE (addr) == PLUS)
+    {
+      rtx base = XEXP (addr, 0);
+      rtx offset = XEXP (addr, 1);
+
+      return (base_reg_operand (base, GET_MODE (base))
+	      && satisfies_constraint_I (offset));
+    }
+
+  else if (GET_CODE (addr) == LO_SUM)
+    {
+      rtx base = XEXP (addr, 0);
+      rtx offset = XEXP (addr, 1);
+
+      if (!base_reg_operand (base, GET_MODE (base)))
+	return 0;
+
+      else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
+	return small_toc_ref (offset, GET_MODE (offset));
+
+      else if (TARGET_ELF && !TARGET_POWERPC64)
+	return CONSTANT_P (offset);
+    }
+
+  return 0;
+})
diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def
index 54548be7038..5124e1665d4 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -42,5 +42,7 @@ VECTOR_MODES (FLOAT, 8);      /*             V4HF V2SF */
 VECTOR_MODES (FLOAT, 16);     /*       V8HF  V4SF V2DF */
 VECTOR_MODES (FLOAT, 32);     /*       V16HF V8SF V4DF */
 
-/* Replacement for TImode that only is allowed in GPRs.  */
+/* Replacement for TImode that only is allowed in GPRs.  We also use PTImode
+   for quad memory atomic operations to force getting an even/odd register
+   combination.  */
 PARTIAL_INT_MODE (TI);
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 3a7b37a8270..3ddabb81c39 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -73,6 +73,9 @@ extern int mems_ok_for_quad_peep (rtx, rtx);
 extern bool gpr_or_gpr_p (rtx, rtx);
 extern bool direct_move_p (rtx, rtx);
 extern bool quad_load_store_p (rtx, rtx);
+extern bool fusion_gpr_load_p (rtx *, bool);
+extern void expand_fusion_gpr_load (rtx *);
+extern const char *emit_fusion_gpr_load (rtx *);
 extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
 							    enum reg_class);
 extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 8b939d8e826..c1acbd825ea 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -284,9 +284,6 @@ static struct
   { "rsqrtd",	 (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
 };
 
-/* 2 argument gen function typedef.  */
-typedef rtx (*gen_2arg_fn_t) (rtx, rtx, rtx);
-
 /* Pointer to function (in rs6000-c.c) that can define or undefine target
    macros that have changed.  Languages that don't support the preprocessor
    don't link in rs6000-c.c, so we can't call it directly.  */
@@ -3074,6 +3071,21 @@ rs6000_option_override_internal (bool global_init_p)
       rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
     }
 
+  /* Enable power8 fusion if we are tuning for power8, even if we aren't
+     generating power8 instructions.  */
+  if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
+    rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
+			 & OPTION_MASK_P8_FUSION);
+
+  /* Power8 does not fuse sign extended loads with the addis.  If we are
+     optimizing at high levels for speed, convert a sign extended load into a
+     zero extending load, and an explicit sign extension.  */
+  if (TARGET_P8_FUSION
+      && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
+      && optimize_function_for_speed_p (cfun)
+      && optimize >= 3)
+    rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
+
   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
     rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
 
@@ -6918,9 +6930,7 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
       && GET_CODE (XEXP (x, 1)) == CONST_INT
       && reg_offset_p
       && !SPE_VECTOR_MODE (mode)
-      && !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
-				  || mode == DDmode || mode == TDmode
-				  || mode == DImode))
+      && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
       && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
     {
       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
@@ -8329,8 +8339,8 @@ rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
 	   || (type && TREE_CODE (type) == VECTOR_TYPE
 	       && int_size_in_bytes (type) >= 16))
     return 128;
-  else if (TARGET_MACHO
- 	   && rs6000_darwin64_abi
+  else if (((TARGET_MACHO && rs6000_darwin64_abi)
+            || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
  	   && mode == BLKmode
 	   && type && TYPE_ALIGN (type) > 64)
     return 128;
@@ -9878,8 +9888,9 @@ rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
      We don't need to check for pass-by-reference because of the test above.
      We can return a simplifed answer, since we know there's no offset to add.  */
 
-  if (TARGET_MACHO
-      && rs6000_darwin64_abi 
+  if (((TARGET_MACHO
+        && rs6000_darwin64_abi)
+       || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
       && integer_zerop (TYPE_SIZE (type)))
     {
       unsigned HOST_WIDE_INT align, boundary;
@@ -11133,9 +11144,6 @@ htm_expand_builtin (tree exp, rtx target, bool * expandedp)
 
 	switch (nopnds)
 	  {
-	  case 0:
-	    pat = GEN_FCN (icode) (NULL_RTX);
-	    break;
 	  case 1:
 	    pat = GEN_FCN (icode) (op[0]);
 	    break;
@@ -21401,8 +21409,7 @@ rs6000_emit_prologue (void)
 	  HOST_WIDE_INT offset;
 
 	  if (!(strategy & SAVE_INLINE_GPRS))
-	    ool_adjust = 8 * (info->first_gp_reg_save
-			      - (FIRST_SAVRES_REGISTER + 1));
+	    ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
 	  offset = info->spe_gp_save_offset + frame_off - ool_adjust;
 	  spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
 	  save_off = frame_off - offset;
@@ -22644,8 +22651,7 @@ rs6000_emit_epilogue (int sibcall)
 	     anew to every function.  */
 
 	  if (!restoring_GPRs_inline)
-	    ool_adjust = 8 * (info->first_gp_reg_save
-			      - (FIRST_SAVRES_REGISTER + 1));
+	    ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
 	  frame_reg_rtx = gen_rtx_REG (Pmode, 11);
 	  emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
 				 GEN_INT (info->spe_gp_save_offset
@@ -28127,7 +28133,7 @@ rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
     passes++;
 
   enum insn_code code = optab_handler (smul_optab, mode);
-  gen_2arg_fn_t gen_mul = (gen_2arg_fn_t) GEN_FCN (code);
+  insn_gen_fn gen_mul = GEN_FCN (code);
 
   gcc_assert (code != CODE_FOR_nothing);
 
@@ -28205,7 +28211,7 @@ rs6000_emit_swrsqrt (rtx dst, rtx src)
   int i;
   rtx halfthree;
   enum insn_code code = optab_handler (smul_optab, mode);
-  gen_2arg_fn_t gen_mul = (gen_2arg_fn_t) GEN_FCN (code);
+  insn_gen_fn gen_mul = GEN_FCN (code);
 
   gcc_assert (code != CODE_FOR_nothing);
 
@@ -30419,6 +30425,382 @@ rs6000_split_logical (rtx operands[3],
 }
 
 
+/* Return true if the peephole2 can combine a load involving a combination of
+   an addis instruction and a load with an offset that can be fused together on
+   a power8.
+
+   The operands are:
+	operands[0]	register set with addis
+	operands[1]	value set via addis
+	operands[2]	target register being loaded
+	operands[3]	D-form memory reference using operands[0].
+
+   In addition, we are passed a boolean that is true if this is a peephole2,
+   and we can use see if the addis_reg is dead after the insn and can be
+   replaced by the target register.  */
+
+bool
+fusion_gpr_load_p (rtx *operands, bool peep2_p)
+{
+  rtx addis_reg = operands[0];
+  rtx addis_value = operands[1];
+  rtx target = operands[2];
+  rtx mem = operands[3];
+  rtx addr;
+  rtx base_reg;
+
+  /* Validate arguments.  */
+  if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
+    return false;
+
+  if (!base_reg_operand (target, GET_MODE (target)))
+    return false;
+
+  if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
+    return false;
+
+  if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
+    return false;
+
+  /* Allow sign/zero extension.  */
+  if (GET_CODE (mem) == ZERO_EXTEND
+      || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
+    mem = XEXP (mem, 0);
+
+  if (!MEM_P (mem))
+    return false;
+
+  addr = XEXP (mem, 0);			/* either PLUS or LO_SUM.  */
+  if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
+    return false;
+
+  /* Validate that the register used to load the high value is either the
+     register being loaded, or we can safely replace its use in a peephole2.
+
+     If this is a peephole2, we assume that there are 2 instructions in the
+     peephole (addis and load), so we want to check if the target register was
+     not used in the memory address and the register to hold the addis result
+     is dead after the peephole.  */
+  if (REGNO (addis_reg) != REGNO (target))
+    {
+      if (!peep2_p)
+	return false;
+
+      if (reg_mentioned_p (target, mem))
+	return false;
+
+      if (!peep2_reg_dead_p (2, addis_reg))
+	return false;
+    }
+
+  base_reg = XEXP (addr, 0);
+  return REGNO (addis_reg) == REGNO (base_reg);
+}
+
+/* During the peephole2 pass, adjust and expand the insns for a load fusion
+   sequence.  We adjust the addis register to use the target register.  If the
+   load sign extends, we adjust the code to do the zero extending load, and an
+   explicit sign extension later since the fusion only covers zero extending
+   loads.
+
+   The operands are:
+	operands[0]	register set with addis (to be replaced with target)
+	operands[1]	value set via addis
+	operands[2]	target register being loaded
+	operands[3]	D-form memory reference using operands[0].  */
+
+void
+expand_fusion_gpr_load (rtx *operands)
+{
+  rtx addis_value = operands[1];
+  rtx target = operands[2];
+  rtx orig_mem = operands[3];
+  rtx  new_addr, new_mem, orig_addr, offset;
+  enum rtx_code plus_or_lo_sum;
+  enum machine_mode target_mode = GET_MODE (target);
+  enum machine_mode extend_mode = target_mode;
+  enum machine_mode ptr_mode = Pmode;
+  enum rtx_code extend = UNKNOWN;
+  rtx addis_reg = ((ptr_mode == target_mode)
+		   ? target
+		   : simplify_subreg (ptr_mode, target, target_mode, 0));
+
+  if (GET_CODE (orig_mem) == ZERO_EXTEND
+      || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
+    {
+      extend = GET_CODE (orig_mem);
+      orig_mem = XEXP (orig_mem, 0);
+      target_mode = GET_MODE (orig_mem);
+    }
+
+  gcc_assert (MEM_P (orig_mem));
+
+  orig_addr = XEXP (orig_mem, 0);
+  plus_or_lo_sum = GET_CODE (orig_addr);
+  gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
+
+  offset = XEXP (orig_addr, 1);
+  new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset);
+  new_mem = change_address (orig_mem, target_mode, new_addr);
+
+  if (extend != UNKNOWN)
+    new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
+
+  emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value));
+  emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
+
+  if (extend == SIGN_EXTEND)
+    {
+      int sub_off = ((BYTES_BIG_ENDIAN)
+		     ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
+		     : 0);
+      rtx sign_reg
+	= simplify_subreg (target_mode, target, extend_mode, sub_off);
+
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
+    }
+
+  return;
+}
+
+/* Return a string to fuse an addis instruction with a gpr load to the same
+   register that we loaded up the addis instruction.  The code is complicated,
+   so we call output_asm_insn directly, and just return "".
+
+   The operands are:
+	operands[0]	register set with addis (must be same reg as target).
+	operands[1]	value set via addis
+	operands[2]	target register being loaded
+	operands[3]	D-form memory reference using operands[0].  */
+
+const char *
+emit_fusion_gpr_load (rtx *operands)
+{
+  rtx addis_reg = operands[0];
+  rtx addis_value = operands[1];
+  rtx target = operands[2];
+  rtx mem = operands[3];
+  rtx fuse_ops[10];
+  rtx addr;
+  rtx load_offset;
+  const char *addis_str = NULL;
+  const char *load_str = NULL;
+  const char *extend_insn = NULL;
+  const char *mode_name = NULL;
+  char insn_template[80];
+  enum machine_mode mode;
+  const char *comment_str = ASM_COMMENT_START;
+  bool sign_p = false;
+
+  gcc_assert (REG_P (addis_reg) && REG_P (target));
+  gcc_assert (REGNO (addis_reg) == REGNO (target));
+
+  if (*comment_str == ' ')
+    comment_str++;
+
+  /* Allow sign/zero extension.  */
+  if (GET_CODE (mem) == ZERO_EXTEND)
+    mem = XEXP (mem, 0);
+
+  else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)
+    {
+      sign_p = true;
+      mem = XEXP (mem, 0);
+    }
+
+  gcc_assert (MEM_P (mem));
+  addr = XEXP (mem, 0);
+  if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
+    gcc_unreachable ();
+
+  load_offset = XEXP (addr, 1);
+
+  /* Now emit the load instruction to the same register.  */
+  mode = GET_MODE (mem);
+  switch (mode)
+    {
+    case QImode:
+      mode_name = "char";
+      load_str = "lbz";
+      extend_insn = "extsb %0,%0";
+      break;
+
+    case HImode:
+      mode_name = "short";
+      load_str = "lhz";
+      extend_insn = "extsh %0,%0";
+      break;
+
+    case SImode:
+      mode_name = "int";
+      load_str = "lwz";
+      extend_insn = "extsw %0,%0";
+      break;
+
+    case DImode:
+      if (TARGET_POWERPC64)
+	{
+	  mode_name = "long";
+	  load_str = "ld";
+	}
+      else
+	gcc_unreachable ();
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Emit the addis instruction.  */
+  fuse_ops[0] = target;
+  if (satisfies_constraint_L (addis_value))
+    {
+      fuse_ops[1] = addis_value;
+      addis_str = "lis %0,%v1";
+    }
+
+  else if (GET_CODE (addis_value) == PLUS)
+    {
+      rtx op0 = XEXP (addis_value, 0);
+      rtx op1 = XEXP (addis_value, 1);
+
+      if (REG_P (op0) && CONST_INT_P (op1)
+	  && satisfies_constraint_L (op1))
+	{
+	  fuse_ops[1] = op0;
+	  fuse_ops[2] = op1;
+	  addis_str = "addis %0,%1,%v2";
+	}
+    }
+
+  else if (GET_CODE (addis_value) == HIGH)
+    {
+      rtx value = XEXP (addis_value, 0);
+      if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
+	{
+	  fuse_ops[1] = XVECEXP (value, 0, 0);		/* symbol ref.  */
+	  fuse_ops[2] = XVECEXP (value, 0, 1);		/* TOC register.  */
+	  if (TARGET_ELF)
+	    addis_str = "addis %0,%2,%1@toc@ha";
+
+	  else if (TARGET_XCOFF)
+	    addis_str = "addis %0,%1@u(%2)";
+
+	  else
+	    gcc_unreachable ();
+	}
+
+      else if (GET_CODE (value) == PLUS)
+	{
+	  rtx op0 = XEXP (value, 0);
+	  rtx op1 = XEXP (value, 1);
+
+	  if (GET_CODE (op0) == UNSPEC
+	      && XINT (op0, 1) == UNSPEC_TOCREL
+	      && CONST_INT_P (op1))
+	    {
+	      fuse_ops[1] = XVECEXP (op0, 0, 0);	/* symbol ref.  */
+	      fuse_ops[2] = XVECEXP (op0, 0, 1);	/* TOC register.  */
+	      fuse_ops[3] = op1;
+	      if (TARGET_ELF)
+		addis_str = "addis %0,%2,%1+%3@toc@ha";
+
+	      else if (TARGET_XCOFF)
+		addis_str = "addis %0,%1+%3@u(%2)";
+
+	      else
+		gcc_unreachable ();
+	    }
+	}
+
+      else if (satisfies_constraint_L (value))
+	{
+	  fuse_ops[1] = value;
+	  addis_str = "lis %0,%v1";
+	}
+
+      else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
+	{
+	  fuse_ops[1] = value;
+	  addis_str = "lis %0,%1@ha";
+	}
+    }
+
+  if (!addis_str)
+    fatal_insn ("Could not generate addis value for fusion", addis_value);
+
+  sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
+	   comment_str, mode_name);
+  output_asm_insn (insn_template, fuse_ops);
+
+  /* Emit the D-form load instruction.  */
+  if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
+    {
+      sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
+      fuse_ops[1] = load_offset;
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else if (GET_CODE (load_offset) == UNSPEC
+	   && XINT (load_offset, 1) == UNSPEC_TOCREL)
+    {
+      if (TARGET_ELF)
+	sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
+
+      else if (TARGET_XCOFF)
+	sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
+
+      else
+	gcc_unreachable ();
+
+      fuse_ops[1] = XVECEXP (load_offset, 0, 0);
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else if (GET_CODE (load_offset) == PLUS
+	   && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
+	   && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
+	   && CONST_INT_P (XEXP (load_offset, 1)))
+    {
+      rtx tocrel_unspec = XEXP (load_offset, 0);
+      if (TARGET_ELF)
+	sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
+
+      else if (TARGET_XCOFF)
+	sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
+
+      else
+	gcc_unreachable ();
+
+      fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
+      fuse_ops[2] = XEXP (load_offset, 1);
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
+    {
+      sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
+
+      fuse_ops[1] = load_offset;
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else
+    fatal_insn ("Unable to generate load offset for fusion", load_offset);
+
+  /* Handle sign extension.  The peephole2 pass generates this as a separate
+     insn, but we handle it just in case it got reattached.  */
+  if (sign_p)
+    {
+      gcc_assert (extend_insn != NULL);
+      output_asm_insn (extend_insn, fuse_ops);
+    }
+
+  return "";
+}
+
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rs6000.h"
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index e5a6abd6d0d..a5a7a859426 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1498,7 +1498,8 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
 
    On the RS/6000, we grow upwards, from the area after the outgoing
    arguments.  */
-#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0 || flag_asan != 0)
+#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0			\
+			      || (flag_sanitize & SANITIZE_ADDRESS) != 0)
 
 /* Size of the outgoing register save area */
 #define RS6000_REG_SAVE ((DEFAULT_ABI == ABI_AIX			\
@@ -2138,9 +2139,15 @@ extern int toc_initialized;
 	    }								\
 	  else if (TARGET_XCOFF)					\
 	    {								\
-	      fputs ("\t.lglobl\t.", FILE);				\
-	      RS6000_OUTPUT_BASENAME (FILE, alias);			\
-	      putc ('\n', FILE);					\
+	      if (!RS6000_WEAK || !DECL_WEAK (DECL))			\
+		{							\
+		  fputs ("\t.lglobl\t.", FILE);				\
+		  RS6000_OUTPUT_BASENAME (FILE, alias);			\
+		  putc ('\n', FILE);					\
+		  fputs ("\t.lglobl\t", FILE);				\
+		  RS6000_OUTPUT_BASENAME (FILE, alias);			\
+		  putc ('\n', FILE);					\
+		}							\
 	    }								\
 	  fputs ("\t.set\t.", FILE);					\
 	  RS6000_OUTPUT_BASENAME (FILE, alias);				\
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 064a51da608..3880f9175a2 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -136,7 +136,6 @@
    UNSPEC_P8V_MTVSRD
    UNSPEC_P8V_XXPERMDI
    UNSPEC_P8V_RELOAD_FROM_VSX
-   UNSPEC_FUSION_GPR
   ])
 
 ;;
@@ -15757,7 +15756,8 @@
 	       "cmpw %2,%L0,%1\;"
 	       "bne- %2,$-16";
       }
-})
+}
+  [(set_attr "length" "20")])
 
 (define_insn "rs6000_mftb_<mode>"
   [(set (match_operand:P 0 "gpc_reg_operand" "=r")
@@ -15771,6 +15771,43 @@
 })
 
 
+;; Power8 fusion support for fusing an addis instruction with a D-form load of
+;; a GPR.  The addis instruction must be adjacent to the load, and use the same
+;; register that is being loaded.  The fused ops must be physically adjacent.
+
+;; We use define_peephole for the actual addis/load, and the register used to
+;; hold the addis value must be the same as the register being loaded.  We use
+;; define_peephole2 to change the register used for addis to be the register
+;; being loaded, since we can look at whether it is dead after the load insn.
+
+(define_peephole
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "fusion_gpr_addis" ""))
+   (set (match_operand:INT1 2 "base_reg_operand" "")
+	(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
+  "TARGET_P8_FUSION && fusion_gpr_load_p (operands, false)"
+{
+  return emit_fusion_gpr_load (operands);
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
+(define_peephole2
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "fusion_gpr_addis" ""))
+   (set (match_operand:INT1 2 "base_reg_operand" "")
+	(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
+  "TARGET_P8_FUSION
+   && (REGNO (operands[0]) != REGNO (operands[2])
+       || GET_CODE (operands[3]) == SIGN_EXTEND)
+   && fusion_gpr_load_p (operands, true)"
+  [(const_int 0)]
+{
+  expand_fusion_gpr_load (operands);
+  DONE;
+})
+
+
 
 (include "sync.md")
 (include "vector.md")
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index f36e4758031..cd83cb2d206 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -546,3 +546,7 @@ Use ISA 2.07 transactional memory (HTM) instructions
 mquad-memory
 Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags)
 Generate the quad word memory instructions (lq/stq/lqarx/stqcx).
+
+mcompat-align-parm
+Target Report Var(rs6000_compat_align_parm) Init(0) Save
+Generate aggregate parameter passing code with at most 64-bit alignment.
diff --git a/gcc/config/rs6000/rtems.h b/gcc/config/rs6000/rtems.h
index b910b5ec5a2..fb22be1e8bb 100644
--- a/gcc/config/rs6000/rtems.h
+++ b/gcc/config/rs6000/rtems.h
@@ -34,6 +34,9 @@
     }                                     \
   while (0)
 
+#undef TARGET_LIBGCC_SDATA_SECTION
+#define TARGET_LIBGCC_SDATA_SECTION ".sdata"
+
 #undef CPP_OS_DEFAULT_SPEC
 #define CPP_OS_DEFAULT_SPEC "%(cpp_os_rtems)"
 
diff --git a/gcc/config/rs6000/t-linux64 b/gcc/config/rs6000/t-linux64
index 9175de2ffe3..70e928dd7cd 100644
--- a/gcc/config/rs6000/t-linux64
+++ b/gcc/config/rs6000/t-linux64
@@ -25,8 +25,8 @@
 # it doesn't tell anything about the 32bit libraries on those systems.  Set
 # MULTILIB_OSDIRNAMES according to what is found on the target.
 
-MULTILIB_OPTIONS        = m64/m32
-MULTILIB_DIRNAMES       = 64 32
-MULTILIB_EXTRA_OPTS     = fPIC
-MULTILIB_OSDIRNAMES	= ../lib64$(call if_multiarch,:powerpc64-linux-gnu)
-MULTILIB_OSDIRNAMES    += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:powerpc-linux-gnu)
+MULTILIB_OPTIONS    := m64/m32
+MULTILIB_DIRNAMES   := 64 32
+MULTILIB_EXTRA_OPTS := 
+MULTILIB_OSDIRNAMES := m64=../lib64$(call if_multiarch,:powerpc64-linux-gnu)
+MULTILIB_OSDIRNAMES += m32=$(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:powerpc-linux-gnu)
diff --git a/gcc/config/rs6000/t-linux64bele b/gcc/config/rs6000/t-linux64bele
new file mode 100644
index 00000000000..97c1ee6fb4d
--- /dev/null
+++ b/gcc/config/rs6000/t-linux64bele
@@ -0,0 +1,7 @@
+#rs6000/t-linux64end
+
+MULTILIB_OPTIONS    += mlittle
+MULTILIB_DIRNAMES   += le
+MULTILIB_OSDIRNAMES += $(subst =,.mlittle=,$(subst lible32,lib32le,$(subst lible64,lib64le,$(subst lib,lible,$(subst -linux,le-linux,$(MULTILIB_OSDIRNAMES))))))
+MULTILIB_OSDIRNAMES += $(subst $(if $(findstring 64,$(target)),m64,m32).,,$(filter $(if $(findstring 64,$(target)),m64,m32).mlittle%,$(MULTILIB_OSDIRNAMES)))
+MULTILIB_MATCHES    := ${MULTILIB_MATCHES_ENDIAN}
diff --git a/gcc/config/rs6000/t-linux64le b/gcc/config/rs6000/t-linux64le
new file mode 100644
index 00000000000..0cf38e1523a
--- /dev/null
+++ b/gcc/config/rs6000/t-linux64le
@@ -0,0 +1,3 @@
+#rs6000/t-linux64le
+
+MULTILIB_OSDIRNAMES := $(subst -linux,le-linux,$(MULTILIB_OSDIRNAMES))
diff --git a/gcc/config/rs6000/t-linux64lebe b/gcc/config/rs6000/t-linux64lebe
new file mode 100644
index 00000000000..2e63bdb9fc9
--- /dev/null
+++ b/gcc/config/rs6000/t-linux64lebe
@@ -0,0 +1,7 @@
+#rs6000/t-linux64leend
+
+MULTILIB_OPTIONS    += mbig
+MULTILIB_DIRNAMES   += be
+MULTILIB_OSDIRNAMES += $(subst =,.mbig=,$(subst libbe32,lib32be,$(subst libbe64,lib64be,$(subst lib,libbe,$(subst le-linux,-linux,$(MULTILIB_OSDIRNAMES))))))
+MULTILIB_OSDIRNAMES += $(subst $(if $(findstring 64,$(target)),m64,m32).,,$(filter $(if $(findstring 64,$(target)),m64,m32).mbig%,$(MULTILIB_OSDIRNAMES)))
+MULTILIB_MATCHES    := ${MULTILIB_MATCHES_ENDIAN}
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 5e6f397031c..11d6b8bb4d0 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -40,6 +40,14 @@
 ;; it to use gprs as well as vsx registers.
 (define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
 
+(define_mode_iterator VSX_M2 [V16QI
+			      V8HI
+			      V4SI
+			      V2DI
+			      V4SF
+			      V2DF
+			      (TI	"TARGET_VSX_TIMODE")])
+
 ;; Map into the appropriate load/store name based on the type
 (define_mode_attr VSm  [(V16QI "vw4")
 			(V8HI  "vw4")
@@ -1446,3 +1454,27 @@
 }"
   [(set_attr "length" "20")
    (set_attr "type" "veccomplex")])
+
+
+;; Power8 Vector fusion.  The fused ops must be physically adjacent.
+(define_peephole
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "short_cint_operand" ""))
+   (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
+	(mem:VSX_M2 (plus:P (match_dup 0)
+			    (match_operand:P 3 "int_reg_operand" ""))))]
+  "TARGET_P8_FUSION"
+  "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"  
+  [(set_attr "length" "8")
+   (set_attr "type" "vecload")])
+
+(define_peephole
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "short_cint_operand" ""))
+   (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
+	(mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
+			    (match_dup 0))))]
+  "TARGET_P8_FUSION"
+  "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"  
+  [(set_attr "length" "8")
+   (set_attr "type" "vecload")])
diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c
index b163a593721..603e49de3ab 100644
--- a/gcc/config/rx/rx.c
+++ b/gcc/config/rx/rx.c
@@ -1021,9 +1021,9 @@ rx_gen_move_template (rtx * operands, bool is_movu)
       gcc_assert (! is_movu);
 
       if (REG_P (src) && REG_P (dest) && (REGNO (dest) == REGNO (src) + 1))
-	sprintf (out_template, "mov.L\t%H1, %H0 | mov.L\t%1, %0");
+	sprintf (out_template, "mov.L\t%%H1, %%H0 ! mov.L\t%%1, %%0");
       else
-	sprintf (out_template, "mov.L\t%1, %0 | mov.L\t%H1, %H0");
+	sprintf (out_template, "mov.L\t%%1, %%0 ! mov.L\t%%H1, %%H0");
     }
   else
     sprintf (out_template, "%s%s\t%s, %s", is_movu ? "movu" : "mov",
@@ -3270,7 +3270,7 @@ rx_ok_to_inline (tree caller, tree callee)
 static bool
 rx_enable_lra (void)
 {
-  return TARGET_ENABLE_LRA || 1;
+  return TARGET_ENABLE_LRA;
 }
 
 
diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h
index 72aee2fe214..ec2770be161 100644
--- a/gcc/config/rx/rx.h
+++ b/gcc/config/rx/rx.h
@@ -99,6 +99,7 @@
 %{mpid} \
 %{mint-register=*} \
 %{mgcc-abi:-mgcc-abi} %{!mgcc-abi:-mrx-abi} \
+%{mcpu=*} \
 "
 
 #undef  LIB_SPEC
diff --git a/gcc/config/s390/2827.md b/gcc/config/s390/2827.md
index f21d8f8ce25..5be7cfaabfb 100644
--- a/gcc/config/s390/2827.md
+++ b/gcc/config/s390/2827.md
@@ -32,12 +32,12 @@
         (const_int 0)))
 
 (define_attr "ooo_groupalone" ""
-  (cond [(eq_attr "mnemonic" "lnxbr,madb,ltxtr,clc,axtr,msebr,slbgr,xc,alcr,lpxbr,slbr,maebr,mlg,mfy,lxdtr,maeb,lxeb,nc,mxtr,sxtr,dxbr,alc,msdbr,ltxbr,lxdb,madbr,lxdbr,lxebr,mvc,m,mseb,mlr,mlgr,slb,tcxb,msdb,sqxbr,alcgr,oc,flogr,alcg,mxbr,dxtr,axbr,mr,sxbr,slbg,ml,lcxbr") (const_int 1)]
+  (cond [(eq_attr "mnemonic" "lnxbr,madb,ltxtr,clc,axtr,msebr,slbgr,xc,alcr,lpxbr,slbr,maebr,mlg,mfy,lxdtr,maeb,lxeb,nc,mxtr,sxtr,dxbr,alc,msdbr,ltxbr,lxdb,madbr,lxdbr,lxebr,mvc,m,mseb,mlr,mlgr,slb,tcxb,msdb,sqxbr,alcgr,oc,flogr,alcg,mxbr,dxtr,axbr,mr,sxbr,slbg,ml,lcxbr,bcr_flush") (const_int 1)]
         (const_int 0)))
 
 (define_insn_reservation "zEC12_simple" 1
   (and (eq_attr "cpu" "zEC12")
-       (eq_attr "mnemonic" "ltg,ogrk,lr,lnebr,lghrl,sdbr,x,asi,lhr,sebr,madb,ar,lhrl,clfxtr,llgfr,clghrl,cgr,cli,agrk,ic,adbr,aebr,lrv,clg,cy,cghi,sy,celfbr,seb,clgfr,al,tm,lang,clfebr,lghr,cdb,lpebr,laa,ark,lh,or,icy,xi,msebr,n,llihl,afi,cs,nrk,sth,lgr,l,lcr,stey,xg,crt,slgfr,ny,ld,j,llihh,slgr,clfhsi,slg,lb,lgrl,lrl,llihf,lndbr,llcr,laxg,mvghi,rllg,sdb,xrk,laag,alhsik,algfi,algr,aly,agfi,lrvr,d,crl,llgc,tmhl,algsi,lgh,icmh,clhrl,xgrk,icm,iilf,ork,lbr,cg,ldgr,lgf,iihf,llghr,sg,clfdbr,llgtr,stam,cebr,tmhh,tceb,slgf,basr,lgbr,maebr,lgb,cgfi,aeb,ltebr,lax,clfit,lrvgr,nihl,ni,clfdtr,srdl,mdb,srk,xihf,stgrl,sthrl,algf,ltr,cdlgbr,cgit,ng,lat,llghrl,ltgr,nihh,clgfrl,srlk,maeb,agr,cxlftr,ler,bcr,stcy,cds,clfi,nihf,ly,clt,lgat,alg,lhy,lgfrl,clghsi,clrt,tmll,srlg,tcdb,ay,sty,clr,lgfi,lan,lpdbr,clgt,adb,ahik,sra,algrk,cdfbr,lcebr,clfxbr,msdbr,ceb,clgr,tmy,tmlh,alghsik,lcgr,mvi,cdbr,ltgf,xr,larl,ldr,llgcr,clgrt,clrl,cghsi,cliy,madbr,oy,ogr,llgt,meebr,slr,clgxbr,chi,s,icmy,llc,ngr,clhhsi,ltgfr,llill,lhi,o,meeb,clgdtr,sll,clgrl,clgf,ledbr,cegbr,mviy,algfr,rll,cdlftr,sldl,cdlgtr,lg,niy,st,sgr,ag,le,xgr,cr,stg,llilh,sr,lzer,cdsg,sllk,mdbr,stoc,csg,clgit,chhsi,strl,llilf,lndfr,ngrk,clgebr,clgfi,llgh,mseb,ltdbr,oill,la,llhrl,stc,lghi,oihl,xiy,sllg,llgf,cgrt,ldeb,cl,sl,cdlfbr,oi,oilh,nr,srak,oihh,ear,slgrk,og,c,slgfi,sthy,oilf,oiy,msdb,oihf,a,cfi,lzxr,lzdr,srag,cdgbr,brasl,alr,cgrl,llgfrl,cit,clgxtr,ley,exrl,lcdfr,lay,xilf,lcdbr,alsi,mvhhi,srl,chsi,lgfr,lrvg,cly,sgrk,ahi,celgbr,nill,clgdbr,jg,slrk,lxr,sar,slfi,cpsdr,lcgfr,aghik,nilh,mvhi,lpdfr,xy,alrk,lao,agsi,ldy,nilf,llhr,alfi,laog,sly,aghi,ldebr,bras,srda,cefbr,lt")) "nothing")
+       (eq_attr "mnemonic" "ltg,ogrk,lr,lnebr,lghrl,sdbr,x,asi,lhr,sebr,madb,ar,lhrl,clfxtr,llgfr,clghrl,cgr,cli,agrk,ic,adbr,aebr,lrv,clg,cy,cghi,sy,celfbr,seb,clgfr,al,tm,lang,clfebr,lghr,cdb,lpebr,laa,ark,lh,or,icy,xi,msebr,n,llihl,afi,cs,nrk,sth,lgr,l,lcr,stey,xg,crt,slgfr,ny,ld,j,llihh,slgr,clfhsi,slg,lb,lgrl,lrl,llihf,lndbr,llcr,laxg,mvghi,rllg,sdb,xrk,laag,alhsik,algfi,algr,aly,agfi,lrvr,d,crl,llgc,tmhl,algsi,lgh,icmh,clhrl,xgrk,icm,iilf,ork,lbr,cg,ldgr,lgf,iihf,llghr,sg,clfdbr,llgtr,stam,cebr,tmhh,tceb,slgf,basr,lgbr,maebr,lgb,cgfi,aeb,ltebr,lax,clfit,lrvgr,nihl,ni,clfdtr,srdl,mdb,srk,xihf,stgrl,sthrl,algf,ltr,cdlgbr,cgit,ng,lat,llghrl,ltgr,nihh,clgfrl,srlk,maeb,agr,cxlftr,ler,bcr_flush,stcy,cds,clfi,nihf,ly,clt,lgat,alg,lhy,lgfrl,clghsi,clrt,tmll,srlg,tcdb,ay,sty,clr,lgfi,lan,lpdbr,clgt,adb,ahik,sra,algrk,cdfbr,lcebr,clfxbr,msdbr,ceb,clgr,tmy,tmlh,alghsik,lcgr,mvi,cdbr,ltgf,xr,larl,ldr,llgcr,clgrt,clrl,cghsi,cliy,madbr,oy,ogr,llgt,meebr,slr,clgxbr,chi,s,icmy,llc,ngr,clhhsi,ltgfr,llill,lhi,o,meeb,clgdtr,sll,clgrl,clgf,ledbr,cegbr,mviy,algfr,rll,cdlftr,sldl,cdlgtr,lg,niy,st,sgr,ag,le,xgr,cr,stg,llilh,sr,lzer,cdsg,sllk,mdbr,stoc,csg,clgit,chhsi,strl,llilf,lndfr,ngrk,clgebr,clgfi,llgh,mseb,ltdbr,oill,la,llhrl,stc,lghi,oihl,xiy,sllg,llgf,cgrt,ldeb,cl,sl,cdlfbr,oi,oilh,nr,srak,oihh,ear,slgrk,og,c,slgfi,sthy,oilf,oiy,msdb,oihf,a,cfi,lzxr,lzdr,srag,cdgbr,brasl,alr,cgrl,llgfrl,cit,clgxtr,ley,exrl,lcdfr,lay,xilf,lcdbr,alsi,mvhhi,srl,chsi,lgfr,lrvg,cly,sgrk,ahi,celgbr,nill,clgdbr,jg,slrk,lxr,sar,slfi,cpsdr,lcgfr,aghik,nilh,mvhi,lpdfr,xy,alrk,lao,agsi,ldy,nilf,llhr,alfi,laog,sly,aghi,ldebr,bras,srda,cefbr,lt,fiebra,fidbra,fixbra,fidtr,fixtr")) "nothing")
 
 (define_insn_reservation "zEC12_cgdbr" 2
   (and (eq_attr "cpu" "zEC12")
@@ -603,3 +603,22 @@
   (and (eq_attr "cpu" "zEC12")
        (eq_attr "mnemonic" "mh")) "nothing")
 
+(define_insn_reservation "zEC12_fiebra" 6
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "fiebra")) "nothing")
+
+(define_insn_reservation "zEC12_fidbra" 6
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "fidbra")) "nothing")
+
+(define_insn_reservation "zEC12_fixbra" 10
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "fixbra")) "nothing")
+
+(define_insn_reservation "zEC12_fidtr" 6
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "fidtr")) "nothing")
+
+(define_insn_reservation "zEC12_fixtr" 10
+  (and (eq_attr "cpu" "zEC12")
+       (eq_attr "mnemonic" "fixtr")) "nothing")
diff --git a/gcc/config/s390/linux.h b/gcc/config/s390/linux.h
index 3b4966a91ff..699b5dfb7e2 100644
--- a/gcc/config/s390/linux.h
+++ b/gcc/config/s390/linux.h
@@ -87,4 +87,7 @@ along with GCC; see the file COPYING3.  If not see
 /* Define if long doubles should be mangled as 'g'.  */
 #define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
 
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function
+
 #endif
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 95ded7c78e6..cf9ef774675 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -117,6 +117,14 @@
    ; Population Count
    UNSPEC_POPCNT
    UNSPEC_COPYSIGN
+
+   ; Load FP Integer
+   UNSPEC_FPINT_FLOOR
+   UNSPEC_FPINT_BTRUNC
+   UNSPEC_FPINT_ROUND
+   UNSPEC_FPINT_CEIL
+   UNSPEC_FPINT_NEARBYINT
+   UNSPEC_FPINT_RINT
  ])
 
 ;;
@@ -291,7 +299,7 @@
                          z196_cracked"
              (const_string "none"))
 
-(define_attr "mnemonic" "unknown" (const_string "unknown"))
+(define_attr "mnemonic" "bcr_flush,unknown" (const_string "unknown"))
 
 ;; Length in bytes.
 
@@ -429,9 +437,25 @@
 ;; the same template.
 (define_code_iterator SHIFT [ashift lshiftrt])
 
-;; This iterator allow r[ox]sbg to be defined with the same template
+;; This iterator allows r[ox]sbg to be defined with the same template
 (define_code_iterator IXOR [ior xor])
 
+;; This iterator is used to expand the patterns for the nearest
+;; integer functions.
+(define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
+			    UNSPEC_FPINT_ROUND UNSPEC_FPINT_CEIL
+			    UNSPEC_FPINT_NEARBYINT])
+(define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
+			     (UNSPEC_FPINT_BTRUNC "btrunc")
+			     (UNSPEC_FPINT_ROUND "round")
+			     (UNSPEC_FPINT_CEIL "ceil")
+			     (UNSPEC_FPINT_NEARBYINT "nearbyint")])
+(define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR "7")
+				     (UNSPEC_FPINT_BTRUNC "5")
+				     (UNSPEC_FPINT_ROUND "1")
+				     (UNSPEC_FPINT_CEIL "6")
+				     (UNSPEC_FPINT_NEARBYINT "0")])
+
 ;; This iterator and attribute allow to combine most atomic operations.
 (define_code_iterator ATOMIC [and ior xor plus minus mult])
 (define_code_iterator ATOMIC_Z196 [and ior xor plus])
@@ -2289,13 +2313,13 @@
    lr\t%0,%1
    tmh\t%1,12288
    ipm\t%0
-   st\t%0,%1
-   sty\t%0,%1
-   l\t%1,%0
-   ly\t%1,%0"
+   l\t%0,%1
+   ly\t%0,%1
+   st\t%1,%0
+   sty\t%1,%0"
   [(set_attr "op_type" "RR,RI,RRE,RX,RXY,RX,RXY")
-   (set_attr "type" "lr,*,*,store,store,load,load")
-   (set_attr "z10prop" "z10_fr_E1,z10_super,*,z10_rec,z10_rec,z10_fwd_A3,z10_fwd_A3")
+   (set_attr "type" "lr,*,*,load,load,store,store")
+   (set_attr "z10prop" "z10_fr_E1,z10_super,*,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec")
    (set_attr "z196prop" "*,*,z196_ends,*,*,*,*")])
 
 ;
@@ -4414,6 +4438,58 @@
   [(set_attr "op_type" "RRF")
    (set_attr "type"    "fsimptf")])
 
+; Binary Floating Point - load fp integer
+
+; Expanders for: floor, btrunc, round, ceil, and nearbyint
+; For all of them the inexact exceptions are suppressed.
+
+; fiebra, fidbra, fixbra
+(define_insn "<FPINT:fpint_name><BFP:mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f")
+	(unspec:BFP [(match_operand:BFP 1 "register_operand" "f")]
+		    FPINT))]
+  "TARGET_Z196"
+  "fi<BFP:xde>bra\t%0,<FPINT:fpint_roundingmode>,%1,4"
+  [(set_attr "op_type"   "RRF")
+   (set_attr "type"      "fsimp<BFP:mode>")])
+
+; rint is supposed to raise an inexact exception so we can use the
+; older instructions.
+
+; fiebr, fidbr, fixbr
+(define_insn "rint<BFP:mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f")
+	(unspec:BFP [(match_operand:BFP 1 "register_operand" "f")]
+		    UNSPEC_FPINT_RINT))]
+  ""
+  "fi<BFP:xde>br\t%0,0,%1"
+  [(set_attr "op_type"   "RRF")
+   (set_attr "type"      "fsimp<BFP:mode>")])
+
+
+; Decimal Floating Point - load fp integer
+
+; fidtr, fixtr
+(define_insn "<FPINT:fpint_name><DFP:mode>2"
+  [(set (match_operand:DFP 0 "register_operand" "=f")
+	(unspec:DFP [(match_operand:DFP 1 "register_operand" "f")]
+		    FPINT))]
+  "TARGET_HARD_DFP"
+  "fi<DFP:xde>tr\t%0,<FPINT:fpint_roundingmode>,%1,4"
+  [(set_attr "op_type"   "RRF")
+   (set_attr "type"      "fsimp<DFP:mode>")])
+
+; fidtr, fixtr
+(define_insn "rint<DFP:mode>2"
+  [(set (match_operand:DFP 0 "register_operand" "=f")
+	(unspec:DFP [(match_operand:DFP 1 "register_operand" "f")]
+		    UNSPEC_FPINT_RINT))]
+  "TARGET_HARD_DFP"
+  "fi<DFP:xde>tr\t%0,0,%1,0"
+  [(set_attr "op_type"   "RRF")
+   (set_attr "type"      "fsimp<DFP:mode>")])
+
+;
 ; Binary <-> Decimal floating point trunc patterns
 ;
 
@@ -9007,12 +9083,22 @@
 
 ; Although bcr is superscalar on Z10, this variant will never
 ; become part of an execution group.
+; With z196 we can make use of the fast-BCR-serialization facility.
+; This allows for a slightly faster sync which is sufficient for our
+; purposes.
 (define_insn "mem_thread_fence_1"
   [(set (match_operand:BLK 0 "" "")
 	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
   ""
-  "bcr\t15,0"
-  [(set_attr "op_type" "RR")])
+{
+  if (TARGET_Z196)
+    return "bcr\t14,0";
+  else
+    return "bcr\t15,0";
+}
+  [(set_attr "op_type" "RR")
+   (set_attr "mnemonic" "bcr_flush")
+   (set_attr "z196prop" "z196_alone")])
 
 ;
 ; atomic load/store operations
diff --git a/gcc/config/s390/tpf.h b/gcc/config/s390/tpf.h
index a2bde82ca79..a1af01b07b6 100644
--- a/gcc/config/s390/tpf.h
+++ b/gcc/config/s390/tpf.h
@@ -94,9 +94,6 @@ along with GCC; see the file COPYING3.  If not see
 #define ASM_SPEC "%{m31&m64}%{mesa&mzarch}%{march=*} \
                   -alshd=%b.lst"
 
-#undef TARGET_C99_FUNCTIONS
-#define TARGET_C99_FUNCTIONS 1
-
 #define ENTRY_SPEC "%{mmain:-entry=_start} \
                     %{!mmain:-entry=0}"
 
@@ -114,4 +111,8 @@ along with GCC; see the file COPYING3.  If not see
 /* IBM copies these libraries over with these names.  */
 #define MATH_LIBRARY "CLBM"
 #define LIBSTDCXX "CPP2"
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function
+
 #endif /* ! _TPF_H */
diff --git a/gcc/config/sol2-10.h b/gcc/config/sol2-10.h
index 81d0f51e144..9df5548e4c1 100644
--- a/gcc/config/sol2-10.h
+++ b/gcc/config/sol2-10.h
@@ -18,5 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-/* Solaris 10 has the float and long double forms of math functions.  */
-#define TARGET_C99_FUNCTIONS 1
+/* Solaris 10 has the float and long double forms of math functions.
+   We redefine this hook so the version from elfos.h header won't be used.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION default_libc_has_function
diff --git a/gcc/config/sol2.h b/gcc/config/sol2.h
index 4c9b334e7a7..b606595dfe9 100644
--- a/gcc/config/sol2.h
+++ b/gcc/config/sol2.h
@@ -285,6 +285,9 @@ along with GCC; see the file COPYING3.  If not see
 
 #define TARGET_POSIX_IO
 
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
 extern GTY(()) tree solaris_pending_aligns;
 extern GTY(()) tree solaris_pending_inits;
 extern GTY(()) tree solaris_pending_finis;
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 48c25dcd5cd..e5b4662512d 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "df.h"
 #include "opts.h"
 #include "tree-pass.h"
+#include "context.h"
 
 /* Processor costs */
 
@@ -1000,34 +1001,44 @@ sparc_do_work_around_errata (void)
   return 0;
 }
 
-struct rtl_opt_pass pass_work_around_errata =
-{
- {
-  RTL_PASS,
-  "errata",				/* name */
-  OPTGROUP_NONE,			/* optinfo_flags */
-  sparc_gate_work_around_errata,	/* gate */
-  sparc_do_work_around_errata,		/* execute */
-  NULL,					/* sub */
-  NULL,					/* next */
-  0,					/* static_pass_number */
-  TV_MACH_DEP,				/* tv_id */
-  0,					/* properties_required */
-  0,					/* properties_provided */
-  0,					/* properties_destroyed */
-  0,					/* todo_flags_start */
-  TODO_verify_rtl_sharing,		/* todo_flags_finish */
- }
-};
+namespace {
 
-struct register_pass_info insert_pass_work_around_errata =
+const pass_data pass_data_work_around_errata =
 {
-  &pass_work_around_errata.pass,	/* pass */
-  "dbr",				/* reference_pass_name */
-  1,					/* ref_pass_instance_number */
-  PASS_POS_INSERT_AFTER			/* po_op */
+  RTL_PASS, /* type */
+  "errata", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_MACH_DEP, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_verify_rtl_sharing, /* todo_flags_finish */
 };
 
+class pass_work_around_errata : public rtl_opt_pass
+{
+public:
+  pass_work_around_errata(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_work_around_errata, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return sparc_gate_work_around_errata (); }
+  unsigned int execute () { return sparc_do_work_around_errata (); }
+
+}; // class pass_work_around_errata
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_work_around_errata (gcc::context *ctxt)
+{
+  return new pass_work_around_errata (ctxt);
+}
+
 /* Helpers for TARGET_DEBUG_OPTIONS.  */
 static void
 dump_target_flag_bits (const int flags)
@@ -1140,9 +1151,8 @@ sparc_option_override (void)
     /* TI TMS390Z55 supersparc */
     { "supersparc",	MASK_ISA, MASK_V8 },
     { "hypersparc",	MASK_ISA, MASK_V8|MASK_FPU },
-    /* LEON */
-    { "leon",		MASK_ISA, MASK_V8|MASK_FPU },
-    { "leon3",		MASK_ISA, MASK_V8|MASK_FPU },
+    { "leon",		MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
+    { "leon3",		MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
     { "sparclite",	MASK_ISA, MASK_SPARCLITE },
     /* The Fujitsu MB86930 is the original sparclite chip, with no FPU.  */
     { "f930",		MASK_ISA|MASK_FPU, MASK_SPARCLITE },
@@ -1302,6 +1312,9 @@ sparc_option_override (void)
 #ifndef HAVE_AS_SPARC4
 		   & ~MASK_CBCOND
 #endif
+#ifndef HAVE_AS_LEON
+		   & ~(MASK_LEON | MASK_LEON3)
+#endif
 		   );
 
   /* If -mfpu or -mno-fpu was explicitly used, don't override with
@@ -1430,6 +1443,10 @@ sparc_option_override (void)
       /* Choose the most relaxed model for the processor.  */
       else if (TARGET_V9)
 	sparc_memory_model = SMM_RMO;
+      else if (TARGET_LEON3)
+	sparc_memory_model = SMM_TSO;
+      else if (TARGET_LEON)
+	sparc_memory_model = SMM_SC;
       else if (TARGET_V8)
 	sparc_memory_model = SMM_PSO;
       else
@@ -1477,6 +1494,14 @@ sparc_option_override (void)
      (essentially) final form of the insn stream to work on.
      Registering the pass must be done at start up.  It's convenient to
      do it here.  */
+  opt_pass *errata_pass = make_pass_work_around_errata (g);
+  struct register_pass_info insert_pass_work_around_errata =
+    {
+      errata_pass,		/* pass */
+      "dbr",			/* reference_pass_name */
+      1,			/* ref_pass_instance_number */
+      PASS_POS_INSERT_AFTER	/* po_op */
+    };
   register_pass (&insert_pass_work_around_errata);
 }
 
@@ -11318,6 +11343,11 @@ sparc_emit_membar_for_model (enum memmodel model,
       /* Total Store Ordering: all memory transactions with store semantics
 	 are followed by an implied StoreStore.  */
       implied |= StoreStore;
+
+      /* If we're not looking for a raw barrer (before+after), then atomic
+	 operations get the benefit of being both load and store.  */
+      if (load_store == 3 && before_after == 1)
+	implied |= StoreLoad;
       /* FALLTHRU */
 
     case SMM_PSO:
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 202d23c0162..d96c1b6b422 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -236,7 +236,7 @@ extern enum cmodel sparc_cmodel;
 #if TARGET_CPU_DEFAULT == TARGET_CPU_leon \
  || TARGET_CPU_DEFAULT == TARGET_CPU_leon3
 #define CPP_CPU32_DEFAULT_SPEC "-D__leon__ -D__sparc_v8__"
-#define ASM_CPU32_DEFAULT_SPEC ""
+#define ASM_CPU32_DEFAULT_SPEC AS_LEON_FLAG
 #endif
 
 #endif
@@ -332,8 +332,8 @@ extern enum cmodel sparc_cmodel;
 %{mcpu=v8:-Av8} \
 %{mcpu=supersparc:-Av8} \
 %{mcpu=hypersparc:-Av8} \
-%{mcpu=leon:-Av8} \
-%{mcpu=leon3:-Av8} \
+%{mcpu=leon:" AS_LEON_FLAG "} \
+%{mcpu=leon3:" AS_LEON_FLAG "} \
 %{mv8plus:-Av8plus} \
 %{mcpu=v9:-Av9} \
 %{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
@@ -1758,6 +1758,12 @@ extern int sparc_indent_opcode;
 #define AS_NIAGARA4_FLAG "-Av9" AS_NIAGARA3_FLAG
 #endif
 
+#ifdef HAVE_AS_LEON
+#define AS_LEON_FLAG "-Aleon"
+#else
+#define AS_LEON_FLAG "-Av8"
+#endif
+
 /* We use gcc _mcount for profiling.  */
 #define NO_PROFILE_COUNTERS 0
 
diff --git a/gcc/config/sparc/sparc.opt b/gcc/config/sparc/sparc.opt
index 016e6997422..3ccd54fa463 100644
--- a/gcc/config/sparc/sparc.opt
+++ b/gcc/config/sparc/sparc.opt
@@ -211,6 +211,12 @@ Enable workarounds for the errata of the UT699 processor
 Mask(LONG_DOUBLE_128)
 ;; Use 128-bit long double
 
+Mask(LEON)
+;; Generate code for LEON
+
+Mask(LEON3)
+;; Generate code for LEON3
+
 Mask(SPARCLITE)
 ;; Generate code for SPARClite
 
diff --git a/gcc/config/sparc/sync.md b/gcc/config/sparc/sync.md
index 2f21f812dc9..130f5219194 100644
--- a/gcc/config/sparc/sync.md
+++ b/gcc/config/sparc/sync.md
@@ -161,7 +161,8 @@
    (match_operand:SI 5 "const_int_operand" "")		;; is_weak
    (match_operand:SI 6 "const_int_operand" "")		;; mod_s
    (match_operand:SI 7 "const_int_operand" "")]		;; mod_f
-  "TARGET_V9 && (<MODE>mode != DImode || TARGET_ARCH64 || TARGET_V8PLUS)"
+  "(TARGET_V9 || TARGET_LEON3)
+   && (<MODE>mode != DImode || TARGET_ARCH64 || TARGET_V8PLUS)"
 {
   sparc_expand_compare_and_swap (operands);
   DONE;
@@ -176,7 +177,7 @@
 	     [(match_operand:I48MODE 2 "register_operand" "")
 	      (match_operand:I48MODE 3 "register_operand" "")]
 	     UNSPECV_CAS))])]
-  "TARGET_V9"
+  "TARGET_V9 || TARGET_LEON3"
   "")
 
 (define_insn "*atomic_compare_and_swap<mode>_1"
@@ -187,7 +188,7 @@
 	  [(match_operand:I48MODE 2 "register_operand" "r")
 	   (match_operand:I48MODE 3 "register_operand" "0")]
 	  UNSPECV_CAS))]
-  "TARGET_V9 && (<MODE>mode == SImode || TARGET_ARCH64)"
+  "(TARGET_V9 || TARGET_LEON3) && (<MODE>mode != DImode || TARGET_ARCH64)"
   "cas<modesuffix>\t%1, %2, %0"
   [(set_attr "type" "multi")])
 
diff --git a/gcc/config/sparc/t-sparc b/gcc/config/sparc/t-sparc
index 664f4a42418..62ad3f77934 100644
--- a/gcc/config/sparc/t-sparc
+++ b/gcc/config/sparc/t-sparc
@@ -24,7 +24,7 @@ sparc.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
   $(FUNCTION_H) $(EXCEPT_H) $(EXPR_H) $(OPTABS_H) $(RECOG_H) \
   $(DIAGNOSTIC_CORE_H) $(GGC_H) $(TM_P_H) debug.h $(TARGET_H) \
   $(TARGET_DEF_H) $(COMMON_TARGET_H) $(GIMPLE_H) $(TREE_PASS_H) \
-  langhooks.h reload.h $(PARAMS_H) $(DF_H) $(OPTS_H) \
+  langhooks.h reload.h $(PARAMS_H) $(DF_H) $(OPTS_H) $(CONTEXT_H) \
   gt-sparc.h
 
 sparc-c.o: $(srcdir)/config/sparc/sparc-c.c \
diff --git a/gcc/config/vms/vms.h b/gcc/config/vms/vms.h
index b7689bfa674..5d0a5c6515c 100644
--- a/gcc/config/vms/vms.h
+++ b/gcc/config/vms/vms.h
@@ -87,3 +87,6 @@ extern void vms_c_register_includes (const char *, const char *, int);
 
 /* Special VMS debugger symbol to record the entry point.  */
 #define VMS_DEBUG_MAIN_POINTER "TRANSFER$BREAK$GO"
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
diff --git a/gcc/config/vxworks.h b/gcc/config/vxworks.h
index d91a8b103ac..72f344b6f01 100644
--- a/gcc/config/vxworks.h
+++ b/gcc/config/vxworks.h
@@ -114,6 +114,9 @@ extern void vxworks_asm_out_destructor (rtx symbol, int priority);
 #undef SIZE_TYPE
 #define SIZE_TYPE "unsigned int"
 
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
 /* Both kernels and RTPs have the facilities required by this macro.  */
 #define TARGET_POSIX_IO