summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authormrs <mrs@138bc75d-0d04-0410-961f-82ee72b054a4>2014-04-30 17:19:02 +0000
committermrs <mrs@138bc75d-0d04-0410-961f-82ee72b054a4>2014-04-30 17:19:02 +0000
commit951019127b9f3cf59989ceb74539d454d535d3b5 (patch)
tree958443e6076f37a3ff7d07d9303f5c4171944638 /gcc/config
parentbedbed50ff969478b0c8878678d7c169a8a0cdaf (diff)
parent8a21d39cf1ea8183eb1e95bbb4396d97ae0dc36d (diff)
downloadgcc-951019127b9f3cf59989ceb74539d454d535d3b5.tar.gz
Merge in trunk.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/wide-int@209944 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c5
-rw-r--r--gcc/config/aarch64/aarch64-protos.h2
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def4
-rw-r--r--gcc/config/aarch64/aarch64-simd.md84
-rw-r--r--gcc/config/aarch64/aarch64.c33
-rw-r--r--gcc/config/aarch64/aarch64.h3
-rw-r--r--gcc/config/aarch64/aarch64.md22
-rw-r--r--gcc/config/aarch64/arm_neon.h2136
-rw-r--r--gcc/config/aarch64/iterators.md26
-rw-r--r--gcc/config/arc/arc.c22
-rw-r--r--gcc/config/arc/arc.opt2
-rw-r--r--gcc/config/i386/sol2-bi.h109
-rw-r--r--gcc/config/i386/sol2.h101
-rw-r--r--gcc/config/i386/t-sol2 (renamed from gcc/config/i386/t-sol2-64)0
-rw-r--r--gcc/config/m32c/m32c.c2
-rw-r--r--gcc/config/m32r/m32r.c20
-rw-r--r--gcc/config/msp430/msp430.c2
-rw-r--r--gcc/config/msp430/msp430.md4
-rw-r--r--gcc/config/rs6000/predicates.md4
-rw-r--r--gcc/config/rs6000/rs6000.md1
-rw-r--r--gcc/config/rs6000/sync.md105
-rw-r--r--gcc/config/sol2-10.h24
-rw-r--r--gcc/config/sol2-bi.h135
-rw-r--r--gcc/config/sol2.h149
-rw-r--r--gcc/config/sparc/sol2.h32
-rw-r--r--gcc/config/sparc/t-sol2 (renamed from gcc/config/sparc/t-sol2-64)0
26 files changed, 1590 insertions, 1437 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 4616ad24c07..a3019828a93 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -246,6 +246,11 @@ aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
#define TYPES_STORE1 (aarch64_types_store1_qualifiers)
#define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_void, qualifier_pointer_map_mode,
+ qualifier_none, qualifier_none };
+#define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
#define CF0(N, X) CODE_FOR_aarch64_##N##X
#define CF1(N, X) CODE_FOR_##N##X##1
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 5542f023b33..04cbc780da2 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -175,6 +175,8 @@ bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx);
bool aarch64_is_long_call_p (rtx);
bool aarch64_label_mentioned_p (rtx);
bool aarch64_legitimate_pic_operand_p (rtx);
+bool aarch64_modes_tieable_p (enum machine_mode mode1,
+ enum machine_mode mode2);
bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode);
bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context,
enum machine_mode);
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index fa332ae5948..339e8f86a4b 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -118,6 +118,10 @@
BUILTIN_VQ (STORESTRUCT, st3, 0)
BUILTIN_VQ (STORESTRUCT, st4, 0)
+ BUILTIN_VQ (STORESTRUCT_LANE, st2_lane, 0)
+ BUILTIN_VQ (STORESTRUCT_LANE, st3_lane, 0)
+ BUILTIN_VQ (STORESTRUCT_LANE, st4_lane, 0)
+
BUILTIN_VQW (BINOP, saddl2, 0)
BUILTIN_VQW (BINOP, uaddl2, 0)
BUILTIN_VQW (BINOP, ssubl2, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c05767b2045..108bc8d8893 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3686,6 +3686,17 @@
[(set_attr "type" "neon_store2_2reg<q>")]
)
+(define_insn "vec_store_lanesoi_lane<mode>"
+ [(set (match_operand:<V_TWO_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
+ (unspec:<V_TWO_ELEM> [(match_operand:OI 1 "register_operand" "w")
+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+ (match_operand:SI 2 "immediate_operand" "i")]
+ UNSPEC_ST2_LANE))]
+ "TARGET_SIMD"
+ "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"
+ [(set_attr "type" "neon_store3_one_lane<q>")]
+)
+
(define_insn "vec_load_lanesci<mode>"
[(set (match_operand:CI 0 "register_operand" "=w")
(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
@@ -3706,6 +3717,17 @@
[(set_attr "type" "neon_store3_3reg<q>")]
)
+(define_insn "vec_store_lanesci_lane<mode>"
+ [(set (match_operand:<V_THREE_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
+ (unspec:<V_THREE_ELEM> [(match_operand:CI 1 "register_operand" "w")
+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+ (match_operand:SI 2 "immediate_operand" "i")]
+ UNSPEC_ST3_LANE))]
+ "TARGET_SIMD"
+ "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"
+ [(set_attr "type" "neon_store3_one_lane<q>")]
+)
+
(define_insn "vec_load_lanesxi<mode>"
[(set (match_operand:XI 0 "register_operand" "=w")
(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
@@ -3726,6 +3748,17 @@
[(set_attr "type" "neon_store4_4reg<q>")]
)
+(define_insn "vec_store_lanesxi_lane<mode>"
+ [(set (match_operand:<V_FOUR_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
+ (unspec:<V_FOUR_ELEM> [(match_operand:XI 1 "register_operand" "w")
+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+ (match_operand:SI 2 "immediate_operand" "i")]
+ UNSPEC_ST4_LANE))]
+ "TARGET_SIMD"
+ "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"
+ [(set_attr "type" "neon_store4_one_lane<q>")]
+)
+
;; Reload patterns for AdvSIMD register list operands.
(define_expand "mov<mode>"
@@ -4220,6 +4253,57 @@
DONE;
})
+(define_expand "aarch64_st2_lane<VQ:mode>"
+ [(match_operand:DI 0 "register_operand" "r")
+ (match_operand:OI 1 "register_operand" "w")
+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+ (match_operand:SI 2 "immediate_operand")]
+ "TARGET_SIMD"
+{
+ enum machine_mode mode = <V_TWO_ELEM>mode;
+ rtx mem = gen_rtx_MEM (mode, operands[0]);
+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+
+ emit_insn (gen_vec_store_lanesoi_lane<VQ:mode> (mem,
+ operands[1],
+ operands[2]));
+ DONE;
+})
+
+(define_expand "aarch64_st3_lane<VQ:mode>"
+ [(match_operand:DI 0 "register_operand" "r")
+ (match_operand:CI 1 "register_operand" "w")
+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+ (match_operand:SI 2 "immediate_operand")]
+ "TARGET_SIMD"
+{
+ enum machine_mode mode = <V_THREE_ELEM>mode;
+ rtx mem = gen_rtx_MEM (mode, operands[0]);
+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+
+ emit_insn (gen_vec_store_lanesci_lane<VQ:mode> (mem,
+ operands[1],
+ operands[2]));
+ DONE;
+})
+
+(define_expand "aarch64_st4_lane<VQ:mode>"
+ [(match_operand:DI 0 "register_operand" "r")
+ (match_operand:XI 1 "register_operand" "w")
+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+ (match_operand:SI 2 "immediate_operand")]
+ "TARGET_SIMD"
+{
+ enum machine_mode mode = <V_FOUR_ELEM>mode;
+ rtx mem = gen_rtx_MEM (mode, operands[0]);
+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+
+ emit_insn (gen_vec_store_lanesxi_lane<VQ:mode> (mem,
+ operands[1],
+ operands[2]));
+ DONE;
+})
+
(define_expand "aarch64_st1<VALL:mode>"
[(match_operand:DI 0 "register_operand")
(match_operand:VALL 1 "register_operand")]
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 1d48108516d..d3d7d1e60d6 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -8316,7 +8316,8 @@ aarch64_cannot_change_mode_class (enum machine_mode from,
/* Limited combinations of subregs are safe on FPREGs. Particularly,
1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
2. Scalar to Scalar for integer modes or same size float modes.
- 3. Vector to Vector modes. */
+ 3. Vector to Vector modes.
+ 4. On little-endian only, Vector-Structure to Vector modes. */
if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
{
if (aarch64_vector_mode_supported_p (from)
@@ -8332,11 +8333,41 @@ aarch64_cannot_change_mode_class (enum machine_mode from,
if (aarch64_vector_mode_supported_p (from)
&& aarch64_vector_mode_supported_p (to))
return false;
+
+ /* Within an vector structure straddling multiple vector registers
+ we are in a mixed-endian representation. As such, we can't
+ easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
+ switch between vectors and vector structures cheaply. */
+ if (!BYTES_BIG_ENDIAN)
+ if ((aarch64_vector_mode_supported_p (from)
+ && aarch64_vect_struct_mode_p (to))
+ || (aarch64_vector_mode_supported_p (to)
+ && aarch64_vect_struct_mode_p (from)))
+ return false;
}
return true;
}
+/* Implement MODES_TIEABLE_P. */
+
+bool
+aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+ if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
+ return true;
+
+ /* We specifically want to allow elements of "structure" modes to
+ be tieable to the structure. This more general condition allows
+ other rarer situations too. */
+ if (TARGET_SIMD
+ && aarch64_vector_mode_p (mode1)
+ && aarch64_vector_mode_p (mode2))
+ return true;
+
+ return false;
+}
+
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST aarch64_address_cost
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index e2b6c8e2908..c9b30d01865 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -365,8 +365,7 @@ extern unsigned long aarch64_tune_flags;
#define HARD_REGNO_MODE_OK(REGNO, MODE) aarch64_hard_regno_mode_ok (REGNO, MODE)
-#define MODES_TIEABLE_P(MODE1, MODE2) \
- (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))
+#define MODES_TIEABLE_P(MODE1, MODE2) aarch64_modes_tieable_p (MODE1, MODE2)
#define DWARF2_UNWIND_INFO 1
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 7965db4c9c7..266d7873a5a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -98,6 +98,9 @@
UNSPEC_ST2
UNSPEC_ST3
UNSPEC_ST4
+ UNSPEC_ST2_LANE
+ UNSPEC_ST3_LANE
+ UNSPEC_ST4_LANE
UNSPEC_TLS
UNSPEC_TLSDESC
UNSPEC_USHL_2S
@@ -2426,6 +2429,25 @@
}
)
+(define_expand "mov<mode>cc"
+ [(set (match_operand:GPF 0 "register_operand" "")
+ (if_then_else:GPF (match_operand 1 "aarch64_comparison_operator" "")
+ (match_operand:GPF 2 "register_operand" "")
+ (match_operand:GPF 3 "register_operand" "")))]
+ ""
+ {
+ rtx ccreg;
+ enum rtx_code code = GET_CODE (operands[1]);
+
+ if (code == UNEQ || code == LTGT)
+ FAIL;
+
+ ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
+ XEXP (operands[1], 1));
+ operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+ }
+)
+
(define_insn "*csinc2<mode>_insn"
[(set (match_operand:GPI 0 "register_operand" "=r")
(plus:GPI (match_operator:GPI 2 "aarch64_comparison_operator"
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 9f1fa98e6fb..f6213ce2aea 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -13199,929 +13199,6 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b)
: /* No clobbers */);
return result;
}
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vuzp1_f32 (float32x2_t a, float32x2_t b)
-{
- float32x2_t result;
- __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vuzp1_p8 (poly8x8_t a, poly8x8_t b)
-{
- poly8x8_t result;
- __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vuzp1_p16 (poly16x4_t a, poly16x4_t b)
-{
- poly16x4_t result;
- __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vuzp1_s8 (int8x8_t a, int8x8_t b)
-{
- int8x8_t result;
- __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vuzp1_s16 (int16x4_t a, int16x4_t b)
-{
- int16x4_t result;
- __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vuzp1_s32 (int32x2_t a, int32x2_t b)
-{
- int32x2_t result;
- __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vuzp1_u8 (uint8x8_t a, uint8x8_t b)
-{
- uint8x8_t result;
- __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vuzp1_u16 (uint16x4_t a, uint16x4_t b)
-{
- uint16x4_t result;
- __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vuzp1_u32 (uint32x2_t a, uint32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vuzp1q_f32 (float32x4_t a, float32x4_t b)
-{
- float32x4_t result;
- __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vuzp1q_f64 (float64x2_t a, float64x2_t b)
-{
- float64x2_t result;
- __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
-{
- poly8x16_t result;
- __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
-{
- poly16x8_t result;
- __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vuzp1q_s8 (int8x16_t a, int8x16_t b)
-{
- int8x16_t result;
- __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vuzp1q_s16 (int16x8_t a, int16x8_t b)
-{
- int16x8_t result;
- __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vuzp1q_s32 (int32x4_t a, int32x4_t b)
-{
- int32x4_t result;
- __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vuzp1q_s64 (int64x2_t a, int64x2_t b)
-{
- int64x2_t result;
- __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
-{
- uint8x16_t result;
- __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
-{
- uint16x8_t result;
- __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vuzp2_f32 (float32x2_t a, float32x2_t b)
-{
- float32x2_t result;
- __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vuzp2_p8 (poly8x8_t a, poly8x8_t b)
-{
- poly8x8_t result;
- __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vuzp2_p16 (poly16x4_t a, poly16x4_t b)
-{
- poly16x4_t result;
- __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vuzp2_s8 (int8x8_t a, int8x8_t b)
-{
- int8x8_t result;
- __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vuzp2_s16 (int16x4_t a, int16x4_t b)
-{
- int16x4_t result;
- __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vuzp2_s32 (int32x2_t a, int32x2_t b)
-{
- int32x2_t result;
- __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vuzp2_u8 (uint8x8_t a, uint8x8_t b)
-{
- uint8x8_t result;
- __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vuzp2_u16 (uint16x4_t a, uint16x4_t b)
-{
- uint16x4_t result;
- __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vuzp2_u32 (uint32x2_t a, uint32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vuzp2q_f32 (float32x4_t a, float32x4_t b)
-{
- float32x4_t result;
- __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vuzp2q_f64 (float64x2_t a, float64x2_t b)
-{
- float64x2_t result;
- __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
-{
- poly8x16_t result;
- __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
-{
- poly16x8_t result;
- __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vuzp2q_s8 (int8x16_t a, int8x16_t b)
-{
- int8x16_t result;
- __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vuzp2q_s16 (int16x8_t a, int16x8_t b)
-{
- int16x8_t result;
- __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vuzp2q_s32 (int32x4_t a, int32x4_t b)
-{
- int32x4_t result;
- __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vuzp2q_s64 (int64x2_t a, int64x2_t b)
-{
- int64x2_t result;
- __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
-{
- uint8x16_t result;
- __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
-{
- uint16x8_t result;
- __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vzip1_f32 (float32x2_t a, float32x2_t b)
-{
- float32x2_t result;
- __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vzip1_p8 (poly8x8_t a, poly8x8_t b)
-{
- poly8x8_t result;
- __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vzip1_p16 (poly16x4_t a, poly16x4_t b)
-{
- poly16x4_t result;
- __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vzip1_s8 (int8x8_t a, int8x8_t b)
-{
- int8x8_t result;
- __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vzip1_s16 (int16x4_t a, int16x4_t b)
-{
- int16x4_t result;
- __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vzip1_s32 (int32x2_t a, int32x2_t b)
-{
- int32x2_t result;
- __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vzip1_u8 (uint8x8_t a, uint8x8_t b)
-{
- uint8x8_t result;
- __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vzip1_u16 (uint16x4_t a, uint16x4_t b)
-{
- uint16x4_t result;
- __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vzip1_u32 (uint32x2_t a, uint32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vzip1q_f32 (float32x4_t a, float32x4_t b)
-{
- float32x4_t result;
- __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vzip1q_f64 (float64x2_t a, float64x2_t b)
-{
- float64x2_t result;
- __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vzip1q_p8 (poly8x16_t a, poly8x16_t b)
-{
- poly8x16_t result;
- __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vzip1q_p16 (poly16x8_t a, poly16x8_t b)
-{
- poly16x8_t result;
- __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vzip1q_s8 (int8x16_t a, int8x16_t b)
-{
- int8x16_t result;
- __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vzip1q_s16 (int16x8_t a, int16x8_t b)
-{
- int16x8_t result;
- __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vzip1q_s32 (int32x4_t a, int32x4_t b)
-{
- int32x4_t result;
- __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vzip1q_s64 (int64x2_t a, int64x2_t b)
-{
- int64x2_t result;
- __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vzip1q_u8 (uint8x16_t a, uint8x16_t b)
-{
- uint8x16_t result;
- __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vzip1q_u16 (uint16x8_t a, uint16x8_t b)
-{
- uint16x8_t result;
- __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vzip1q_u32 (uint32x4_t a, uint32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vzip1q_u64 (uint64x2_t a, uint64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vzip2_f32 (float32x2_t a, float32x2_t b)
-{
- float32x2_t result;
- __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vzip2_p8 (poly8x8_t a, poly8x8_t b)
-{
- poly8x8_t result;
- __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vzip2_p16 (poly16x4_t a, poly16x4_t b)
-{
- poly16x4_t result;
- __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vzip2_s8 (int8x8_t a, int8x8_t b)
-{
- int8x8_t result;
- __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vzip2_s16 (int16x4_t a, int16x4_t b)
-{
- int16x4_t result;
- __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vzip2_s32 (int32x2_t a, int32x2_t b)
-{
- int32x2_t result;
- __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vzip2_u8 (uint8x8_t a, uint8x8_t b)
-{
- uint8x8_t result;
- __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vzip2_u16 (uint16x4_t a, uint16x4_t b)
-{
- uint16x4_t result;
- __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vzip2_u32 (uint32x2_t a, uint32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vzip2q_f32 (float32x4_t a, float32x4_t b)
-{
- float32x4_t result;
- __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vzip2q_f64 (float64x2_t a, float64x2_t b)
-{
- float64x2_t result;
- __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vzip2q_p8 (poly8x16_t a, poly8x16_t b)
-{
- poly8x16_t result;
- __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vzip2q_p16 (poly16x8_t a, poly16x8_t b)
-{
- poly16x8_t result;
- __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vzip2q_s8 (int8x16_t a, int8x16_t b)
-{
- int8x16_t result;
- __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vzip2q_s16 (int16x8_t a, int16x8_t b)
-{
- int16x8_t result;
- __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vzip2q_s32 (int32x4_t a, int32x4_t b)
-{
- int32x4_t result;
- __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vzip2q_s64 (int64x2_t a, int64x2_t b)
-{
- int64x2_t result;
- __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vzip2q_u8 (uint8x16_t a, uint8x16_t b)
-{
- uint8x16_t result;
- __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vzip2q_u16 (uint16x8_t a, uint16x8_t b)
-{
- uint16x8_t result;
- __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vzip2q_u32 (uint32x4_t a, uint32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vzip2q_u64 (uint64x2_t a, uint64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
/* End of temporary inline asm implementations. */
@@ -14452,131 +13529,224 @@ __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
__LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
__LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
-#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \
- lnsuffix, funcsuffix, Q) \
- typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype; \
- __extension__ static __inline void \
- __attribute__ ((__always_inline__)) \
- vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
- intype b, const int c) \
- { \
- __ST2_LANE_STRUCTURE_##intype *__p = \
- (__ST2_LANE_STRUCTURE_##intype *)ptr; \
- __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
- "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \
- : "=Q"(*__p) \
- : "Q"(b), "i"(c) \
- : "v16", "v17"); \
- }
-
-__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
-__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
-__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
-__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
-__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
-__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
-__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
-__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
-__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
-__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
-__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
-__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
-__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
-__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
-__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
-__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
-__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
-__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
-__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
-__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
-__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
-__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
-__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
-__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
-
-#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \
- lnsuffix, funcsuffix, Q) \
- typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype; \
- __extension__ static __inline void \
- __attribute__ ((__always_inline__)) \
- vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
- intype b, const int c) \
- { \
- __ST3_LANE_STRUCTURE_##intype *__p = \
- (__ST3_LANE_STRUCTURE_##intype *)ptr; \
- __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
- "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \
- : "=Q"(*__p) \
- : "Q"(b), "i"(c) \
- : "v16", "v17", "v18"); \
- }
-
-__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
-__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
-__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
-__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
-__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
-__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
-__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
-__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
-__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
-__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
-__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
-__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
-__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
-__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
-__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
-__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
-__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
-__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
-__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
-__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
-__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
-__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
-__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
-__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
-
-#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \
- lnsuffix, funcsuffix, Q) \
- typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype; \
- __extension__ static __inline void \
- __attribute__ ((__always_inline__)) \
- vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
- intype b, const int c) \
- { \
- __ST4_LANE_STRUCTURE_##intype *__p = \
- (__ST4_LANE_STRUCTURE_##intype *)ptr; \
- __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
- "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \
- : "=Q"(*__p) \
- : "Q"(b), "i"(c) \
- : "v16", "v17", "v18", "v19"); \
- }
-
-__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
-__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
-__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
-__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
-__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
-__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
-__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
-__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
-__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
-__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
-__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
-__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
-__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
-__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
-__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
-__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
-__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
-__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
-__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
-__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
-__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
-__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
-__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
-__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
+#define __ST2_LANE_FUNC(intype, largetype, ptrtype, \
+ mode, ptr_mode, funcsuffix, signedtype) \
+__extension__ static __inline void \
+__attribute__ ((__always_inline__)) \
+vst2_lane_ ## funcsuffix (ptrtype *__ptr, \
+ intype __b, const int __c) \
+{ \
+ __builtin_aarch64_simd_oi __o; \
+ largetype __temp; \
+ __temp.val[0] \
+ = vcombine_##funcsuffix (__b.val[0], \
+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+ __temp.val[1] \
+ = vcombine_##funcsuffix (__b.val[1], \
+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+ __o = __builtin_aarch64_set_qregoi##mode (__o, \
+ (signedtype) __temp.val[0], 0); \
+ __o = __builtin_aarch64_set_qregoi##mode (__o, \
+ (signedtype) __temp.val[1], 1); \
+ __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+ __ptr, __o, __c); \
+}
+
+__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
+ float32x4_t)
+__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,
+ float64x2_t)
+__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t)
+__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16,
+ int16x8_t)
+__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t)
+__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t)
+__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t)
+__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t)
+__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t)
+__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16,
+ int16x8_t)
+__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32,
+ int32x4_t)
+__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64,
+ int64x2_t)
+
+#undef __ST2_LANE_FUNC
+#define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
+__extension__ static __inline void \
+__attribute__ ((__always_inline__)) \
+vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \
+ intype __b, const int __c) \
+{ \
+ union { intype __i; \
+ __builtin_aarch64_simd_oi __o; } __temp = { __b }; \
+ __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+ __ptr, __temp.__o, __c); \
+}
+
+__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
+__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
+__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
+__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
+__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
+__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
+__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
+__ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
+__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
+__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
+__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
+__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
+
+#define __ST3_LANE_FUNC(intype, largetype, ptrtype, \
+ mode, ptr_mode, funcsuffix, signedtype) \
+__extension__ static __inline void \
+__attribute__ ((__always_inline__)) \
+vst3_lane_ ## funcsuffix (ptrtype *__ptr, \
+ intype __b, const int __c) \
+{ \
+ __builtin_aarch64_simd_ci __o; \
+ largetype __temp; \
+ __temp.val[0] \
+ = vcombine_##funcsuffix (__b.val[0], \
+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+ __temp.val[1] \
+ = vcombine_##funcsuffix (__b.val[1], \
+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+ __temp.val[2] \
+ = vcombine_##funcsuffix (__b.val[2], \
+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+ __o = __builtin_aarch64_set_qregci##mode (__o, \
+ (signedtype) __temp.val[0], 0); \
+ __o = __builtin_aarch64_set_qregci##mode (__o, \
+ (signedtype) __temp.val[1], 1); \
+ __o = __builtin_aarch64_set_qregci##mode (__o, \
+ (signedtype) __temp.val[2], 2); \
+ __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+ __ptr, __o, __c); \
+}
+
+__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32,
+ float32x4_t)
+__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64,
+ float64x2_t)
+__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t)
+__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16,
+ int16x8_t)
+__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t)
+__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t)
+__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t)
+__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t)
+__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t)
+__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16,
+ int16x8_t)
+__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32,
+ int32x4_t)
+__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64,
+ int64x2_t)
+
+#undef __ST3_LANE_FUNC
+#define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
+__extension__ static __inline void \
+__attribute__ ((__always_inline__)) \
+vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \
+ intype __b, const int __c) \
+{ \
+ union { intype __i; \
+ __builtin_aarch64_simd_ci __o; } __temp = { __b }; \
+ __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+ __ptr, __temp.__o, __c); \
+}
+
+__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
+__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
+__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
+__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
+__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
+__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
+__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
+__ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
+__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
+__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
+__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
+__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
+
+#define __ST4_LANE_FUNC(intype, largetype, ptrtype, \
+ mode, ptr_mode, funcsuffix, signedtype) \
+__extension__ static __inline void \
+__attribute__ ((__always_inline__)) \
+vst4_lane_ ## funcsuffix (ptrtype *__ptr, \
+ intype __b, const int __c) \
+{ \
+ __builtin_aarch64_simd_xi __o; \
+ largetype __temp; \
+ __temp.val[0] \
+ = vcombine_##funcsuffix (__b.val[0], \
+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+ __temp.val[1] \
+ = vcombine_##funcsuffix (__b.val[1], \
+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+ __temp.val[2] \
+ = vcombine_##funcsuffix (__b.val[2], \
+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+ __temp.val[3] \
+ = vcombine_##funcsuffix (__b.val[3], \
+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+ __o = __builtin_aarch64_set_qregxi##mode (__o, \
+ (signedtype) __temp.val[0], 0); \
+ __o = __builtin_aarch64_set_qregxi##mode (__o, \
+ (signedtype) __temp.val[1], 1); \
+ __o = __builtin_aarch64_set_qregxi##mode (__o, \
+ (signedtype) __temp.val[2], 2); \
+ __o = __builtin_aarch64_set_qregxi##mode (__o, \
+ (signedtype) __temp.val[3], 3); \
+ __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+ __ptr, __o, __c); \
+}
+
+__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32,
+ float32x4_t)
+__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64,
+ float64x2_t)
+__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t)
+__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16,
+ int16x8_t)
+__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t)
+__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t)
+__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t)
+__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t)
+__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t)
+__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16,
+ int16x8_t)
+__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32,
+ int32x4_t)
+__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64,
+ int64x2_t)
+
+#undef __ST4_LANE_FUNC
+#define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
+__extension__ static __inline void \
+__attribute__ ((__always_inline__)) \
+vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \
+ intype __b, const int __c) \
+{ \
+ union { intype __i; \
+ __builtin_aarch64_simd_xi __o; } __temp = { __b }; \
+ __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+ __ptr, __temp.__o, __c); \
+}
+
+__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
+__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
+__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
+__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
+__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
+__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
+__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
+__ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
+__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
+__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
+__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
+__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
vaddlv_s32 (int32x2_t a)
@@ -25614,10 +24784,880 @@ vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
/* vuzp */
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vuzp1_f32 (float32x2_t __a, float32x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
+#endif
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vuzp1_s8 (int8x8_t __a, int8x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vuzp1_s16 (int16x4_t __a, int16x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
+#endif
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vuzp1_s32 (int32x2_t __a, int32x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
+#endif
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
+#endif
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
+#endif
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
+#else
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
+#endif
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
+#endif
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
+#else
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
+#endif
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
+#endif
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vuzp2_f32 (float32x2_t __a, float32x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
+#endif
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vuzp2_s8 (int8x8_t __a, int8x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vuzp2_s16 (int16x4_t __a, int16x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
+#endif
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vuzp2_s32 (int32x2_t __a, int32x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
+#endif
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
+#endif
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
+#else
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
+#endif
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
+#else
+ return __builtin_shuffle (__a, __b,
+ (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
+#endif
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
+#endif
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
+#endif
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
+#endif
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
__INTERLEAVE_LIST (uzp)
/* vzip */
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vzip1_f32 (float32x2_t __a, float32x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
+#endif
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vzip1_s8 (int8x8_t __a, int8x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vzip1_s16 (int16x4_t __a, int16x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
+#endif
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vzip1_s32 (int32x2_t __a, int32x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
+#endif
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vzip1q_f32 (float32x4_t __a, float32x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
+#endif
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vzip1q_f64 (float64x2_t __a, float64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
+#endif
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t)
+ {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vzip1q_s8 (int8x16_t __a, int8x16_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
+#endif
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vzip1q_s16 (int16x8_t __a, int16x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t)
+ {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vzip1q_s32 (int32x4_t __a, int32x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
+#endif
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vzip1q_s64 (int64x2_t __a, int64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
+#endif
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t)
+ {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
+#endif
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vzip2_f32 (float32x2_t __a, float32x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
+#endif
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vzip2_s8 (int8x8_t __a, int8x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vzip2_s16 (int16x4_t __a, int16x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
+#endif
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vzip2_s32 (int32x2_t __a, int32x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
+#endif
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vzip2q_f32 (float32x4_t __a, float32x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
+#endif
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vzip2q_f64 (float64x2_t __a, float64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
+#endif
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t)
+ {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vzip2q_s8 (int8x16_t __a, int8x16_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
+#endif
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vzip2q_s16 (int16x8_t __a, int16x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t)
+ {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vzip2q_s32 (int32x4_t __a, int32x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
+#endif
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vzip2q_s64 (int64x2_t __a, int64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
+#else
+ return __builtin_shuffle (__a, __b, (uint8x16_t)
+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
+#endif
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+ return __builtin_shuffle (__a, __b, (uint16x8_t)
+ {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
+#else
+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
+#endif
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
__INTERLEAVE_LIST (zip)
#undef __INTERLEAVE_LIST
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index fd1eb482f0f..c537c3780ee 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -564,6 +564,32 @@
(define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")])
+;; Mode of pair of elements for each vector mode, to define transfer
+;; size for structure lane/dup loads and stores.
+(define_mode_attr V_TWO_ELEM [(V8QI "HI") (V16QI "HI")
+ (V4HI "SI") (V8HI "SI")
+ (V2SI "V2SI") (V4SI "V2SI")
+ (DI "V2DI") (V2DI "V2DI")
+ (V2SF "V2SF") (V4SF "V2SF")
+ (DF "V2DI") (V2DF "V2DI")])
+
+;; Similar, for three elements.
+(define_mode_attr V_THREE_ELEM [(V8QI "BLK") (V16QI "BLK")
+ (V4HI "BLK") (V8HI "BLK")
+ (V2SI "BLK") (V4SI "BLK")
+ (DI "EI") (V2DI "EI")
+ (V2SF "BLK") (V4SF "BLK")
+ (DF "EI") (V2DF "EI")])
+
+;; Similar, for four elements.
+(define_mode_attr V_FOUR_ELEM [(V8QI "SI") (V16QI "SI")
+ (V4HI "V4HI") (V8HI "V4HI")
+ (V2SI "V4SI") (V4SI "V4SI")
+ (DI "OI") (V2DI "OI")
+ (V2SF "V4SF") (V4SF "V4SF")
+ (DF "OI") (V2DF "OI")])
+
+
;; Mode for atomic operation suffixes
(define_mode_attr atomic_sfx
[(QI "b") (HI "h") (SI "") (DI "")])
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 987a7c9a55a..58d95d832c5 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -996,7 +996,7 @@ arc_select_cc_mode (enum rtx_code op, rtx x, rtx y)
if (GET_MODE_CLASS (mode) == MODE_INT
&& y == const0_rtx
&& (op == EQ || op == NE
- || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x) <= 4))))
+ || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x)) <= 4))))
return CC_ZNmode;
/* add.f for if (a+b) */
@@ -1135,31 +1135,33 @@ arc_init_reg_tables (void)
for (i = 0; i < NUM_MACHINE_MODES; i++)
{
- switch (GET_MODE_CLASS (i))
+ enum machine_mode m = (enum machine_mode) i;
+
+ switch (GET_MODE_CLASS (m))
{
case MODE_INT:
case MODE_PARTIAL_INT:
case MODE_COMPLEX_INT:
- if (GET_MODE_SIZE (i) <= 4)
+ if (GET_MODE_SIZE (m) <= 4)
arc_mode_class[i] = 1 << (int) S_MODE;
- else if (GET_MODE_SIZE (i) == 8)
+ else if (GET_MODE_SIZE (m) == 8)
arc_mode_class[i] = 1 << (int) D_MODE;
- else if (GET_MODE_SIZE (i) == 16)
+ else if (GET_MODE_SIZE (m) == 16)
arc_mode_class[i] = 1 << (int) T_MODE;
- else if (GET_MODE_SIZE (i) == 32)
+ else if (GET_MODE_SIZE (m) == 32)
arc_mode_class[i] = 1 << (int) O_MODE;
else
arc_mode_class[i] = 0;
break;
case MODE_FLOAT:
case MODE_COMPLEX_FLOAT:
- if (GET_MODE_SIZE (i) <= 4)
+ if (GET_MODE_SIZE (m) <= 4)
arc_mode_class[i] = 1 << (int) SF_MODE;
- else if (GET_MODE_SIZE (i) == 8)
+ else if (GET_MODE_SIZE (m) == 8)
arc_mode_class[i] = 1 << (int) DF_MODE;
- else if (GET_MODE_SIZE (i) == 16)
+ else if (GET_MODE_SIZE (m) == 16)
arc_mode_class[i] = 1 << (int) TF_MODE;
- else if (GET_MODE_SIZE (i) == 32)
+ else if (GET_MODE_SIZE (m) == 32)
arc_mode_class[i] = 1 << (int) OF_MODE;
else
arc_mode_class[i] = 0;
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index 2deb9e77e13..1e98db97095 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -340,9 +340,9 @@ Pass -marclinux_prof option through to linker.
;; lra is still unproven for ARC, so allow to fall back to reload with -mno-lra.
;Target InverseMask(NO_LRA)
-mlra
; lra still won't allow to configure libgcc; see PR rtl-optimization/55464.
; so don't enable by default.
+mlra
Target Mask(LRA)
Enable lra
diff --git a/gcc/config/i386/sol2-bi.h b/gcc/config/i386/sol2-bi.h
deleted file mode 100644
index 66d17801f03..00000000000
--- a/gcc/config/i386/sol2-bi.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/* Definitions of target machine for GCC, for bi-arch Solaris 2/x86.
- Copyright (C) 2004-2014 Free Software Foundation, Inc.
- Contributed by CodeSourcery, LLC.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-/* Override i386/sol2.h version: return 8-byte vectors in MMX registers if
- possible, matching Sun Studio 12 Update 1+ compilers and other x86
- targets. */
-#undef TARGET_SUBTARGET_DEFAULT
-#define TARGET_SUBTARGET_DEFAULT \
- (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS)
-
-#define SUBTARGET_OPTIMIZATION_OPTIONS \
- { OPT_LEVELS_1_PLUS, OPT_momit_leaf_frame_pointer, NULL, 1 }
-
-/* GNU as understands --32 and --64, but the native Solaris
- assembler requires -xarch=generic or -xarch=generic64 instead. */
-#ifdef USE_GAS
-#define ASM_CPU32_DEFAULT_SPEC "--32"
-#define ASM_CPU64_DEFAULT_SPEC "--64"
-#else
-#define ASM_CPU32_DEFAULT_SPEC "-xarch=generic"
-#define ASM_CPU64_DEFAULT_SPEC "-xarch=generic64"
-#endif
-
-#undef ASM_CPU_SPEC
-#define ASM_CPU_SPEC "%(asm_cpu_default)"
-
-/* Don't let i386/x86-64.h override i386/sol2.h version. Still cannot use
- -K PIC with the Solaris 10+ assembler, it gives many warnings:
- Absolute relocation is used for symbol "<symbol>" */
-#undef ASM_SPEC
-#define ASM_SPEC ASM_SPEC_BASE
-
-/* We do not need to search a special directory for startup files. */
-#undef MD_STARTFILE_PREFIX
-
-#define DEFAULT_ARCH32_P !TARGET_64BIT_DEFAULT
-
-#define ARCH64_SUBDIR "amd64"
-
-#ifdef USE_GLD
-/* Since binutils 2.21, GNU ld supports new *_sol2 emulations to strictly
- follow the Solaris 2 ABI. Prefer them if present. */
-#ifdef HAVE_LD_SOL2_EMULATION
-#define ARCH32_EMULATION "elf_i386_sol2"
-#define ARCH64_EMULATION "elf_x86_64_sol2"
-#else
-#define ARCH32_EMULATION "elf_i386"
-#define ARCH64_EMULATION "elf_x86_64"
-#endif
-#endif
-
-#undef ASM_COMMENT_START
-#define ASM_COMMENT_START "/"
-
-/* The native Solaris assembler can't calculate the difference between
- symbols in different sections, which causes problems for -fPIC jump
- tables in .rodata. */
-#ifndef HAVE_AS_IX86_DIFF_SECT_DELTA
-#undef JUMP_TABLES_IN_TEXT_SECTION
-#define JUMP_TABLES_IN_TEXT_SECTION 1
-
-/* The native Solaris assembler cannot handle the SYMBOL-. syntax, but
- requires SYMBOL@rel/@rel64 instead. */
-#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL) \
- do { \
- fputs (integer_asm_op (SIZE, FALSE), FILE); \
- assemble_name (FILE, LABEL); \
- fputs (SIZE == 8 ? "@rel64" : "@rel", FILE); \
- } while (0)
-#endif
-
-/* As in sol2.h, override the default from i386/x86-64.h to work around
- Sun as TLS bug. */
-#undef ASM_OUTPUT_ALIGNED_COMMON
-#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \
- do \
- { \
- if (TARGET_SUN_TLS \
- && in_section \
- && ((in_section->common.flags & SECTION_TLS) == SECTION_TLS)) \
- switch_to_section (bss_section); \
- x86_elf_aligned_common (FILE, NAME, SIZE, ALIGN); \
- } \
- while (0)
-
-#define USE_IX86_FRAME_POINTER 1
-#define USE_X86_64_FRAME_POINTER 1
-
-#undef NO_PROFILE_COUNTERS
-
-#undef MCOUNT_NAME
-#define MCOUNT_NAME "_mcount"
diff --git a/gcc/config/i386/sol2.h b/gcc/config/i386/sol2.h
index 6676941f67a..9c3a6f49662 100644
--- a/gcc/config/i386/sol2.h
+++ b/gcc/config/i386/sol2.h
@@ -18,12 +18,8 @@ You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
-/* Augment i386/unix.h version to return 8-byte vectors in memory, matching
- Sun Studio compilers until version 12, the only ones supported on
- Solaris 9. */
-#undef TARGET_SUBTARGET_DEFAULT
-#define TARGET_SUBTARGET_DEFAULT \
- (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_VECT8_RETURNS)
+#define SUBTARGET_OPTIMIZATION_OPTIONS \
+ { OPT_LEVELS_1_PLUS, OPT_momit_leaf_frame_pointer, NULL, 1 }
/* Old versions of the Solaris assembler can not handle the difference of
labels in different sections, so force DW_EH_PE_datarel if so. */
@@ -50,27 +46,46 @@ along with GCC; see the file COPYING3. If not see
#undef TARGET_SUN_TLS
#define TARGET_SUN_TLS 1
-#undef SIZE_TYPE
-#define SIZE_TYPE "unsigned int"
-
-#undef PTRDIFF_TYPE
-#define PTRDIFF_TYPE "int"
-
/* Solaris 2/Intel as chokes on #line directives before Solaris 10. */
#undef CPP_SPEC
#define CPP_SPEC "%{,assembler-with-cpp:-P} %(cpp_subtarget)"
-#define ASM_CPU_DEFAULT_SPEC ""
+/* GNU as understands --32 and --64, but the native Solaris
+ assembler requires -xarch=generic or -xarch=generic64 instead. */
+#ifdef USE_GAS
+#define ASM_CPU32_DEFAULT_SPEC "--32"
+#define ASM_CPU64_DEFAULT_SPEC "--64"
+#else
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=generic"
+#define ASM_CPU64_DEFAULT_SPEC "-xarch=generic64"
+#endif
+
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC "%(asm_cpu_default)"
-#define ASM_CPU_SPEC ""
-
-/* Don't include ASM_PIC_SPEC. While the Solaris 9 assembler accepts
- -K PIC, it gives many warnings:
- R_386_32 relocation is used for symbol "<symbol>"
+/* Don't include ASM_PIC_SPEC. While the Solaris 10+ assembler accepts -K PIC,
+ it gives many warnings:
+ Absolute relocation is used for symbol "<symbol>"
GNU as doesn't recognize -K at all. */
#undef ASM_SPEC
#define ASM_SPEC ASM_SPEC_BASE
+#define DEFAULT_ARCH32_P !TARGET_64BIT_DEFAULT
+
+#define ARCH64_SUBDIR "amd64"
+
+#ifdef USE_GLD
+/* Since binutils 2.21, GNU ld supports new *_sol2 emulations to strictly
+ follow the Solaris 2 ABI. Prefer them if present. */
+#ifdef HAVE_LD_SOL2_EMULATION
+#define ARCH32_EMULATION "elf_i386_sol2"
+#define ARCH64_EMULATION "elf_x86_64_sol2"
+#else
+#define ARCH32_EMULATION "elf_i386"
+#define ARCH64_EMULATION "elf_x86_64"
+#endif
+#endif
+
#undef ENDFILE_SPEC
#define ENDFILE_SPEC \
"%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
@@ -84,23 +99,39 @@ along with GCC; see the file COPYING3. If not see
{ "asm_cpu", ASM_CPU_SPEC }, \
{ "asm_cpu_default", ASM_CPU_DEFAULT_SPEC }, \
-#undef SUBTARGET_EXTRA_SPECS
-#define SUBTARGET_EXTRA_SPECS \
- { "startfile_arch", STARTFILE_ARCH_SPEC }, \
- { "link_arch", LINK_ARCH_SPEC }, \
- SUBTARGET_CPU_EXTRA_SPECS
-
/* Register the Solaris-specific #pragma directives. */
#define REGISTER_SUBTARGET_PRAGMAS() solaris_register_pragmas ()
#undef LOCAL_LABEL_PREFIX
#define LOCAL_LABEL_PREFIX "."
+/* The Solaris 10 FCS as doesn't accept "#" comments, while later versions
+ do. */
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "/"
+
/* The 32-bit Solaris assembler does not support .quad. Do not use it. */
#ifndef HAVE_AS_IX86_QUAD
#undef ASM_QUAD
#endif
+/* The native Solaris assembler can't calculate the difference between
+ symbols in different sections, which causes problems for -fPIC jump
+ tables in .rodata. */
+#ifndef HAVE_AS_IX86_DIFF_SECT_DELTA
+#undef JUMP_TABLES_IN_TEXT_SECTION
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* The native Solaris assembler cannot handle the SYMBOL-. syntax, but
+ requires SYMBOL@rel/@rel64 instead. */
+#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL) \
+ do { \
+ fputs (integer_asm_op (SIZE, FALSE), FILE); \
+ assemble_name (FILE, LABEL); \
+ fputs (SIZE == 8 ? "@rel64" : "@rel", FILE); \
+ } while (0)
+#endif
+
/* The Solaris assembler wants a .local for non-exported aliases. */
#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET) \
do { \
@@ -148,6 +179,20 @@ along with GCC; see the file COPYING3. If not see
while (0)
#endif /* !USE_GAS */
+/* As in sparc/sol2.h, override the default from i386/x86-64.h to work
+ around Sun as TLS bug. */
+#undef ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \
+ do \
+ { \
+ if (TARGET_SUN_TLS \
+ && in_section \
+ && ((in_section->common.flags & SECTION_TLS) == SECTION_TLS)) \
+ switch_to_section (bss_section); \
+ x86_elf_aligned_common (FILE, NAME, SIZE, ALIGN); \
+ } \
+ while (0)
+
/* Output a simple call for .init/.fini. */
#define ASM_OUTPUT_CALL(FILE, FN) \
do \
@@ -174,6 +219,14 @@ along with GCC; see the file COPYING3. If not see
#define DTORS_SECTION_ASM_OP "\t.section\t.dtors, \"aw\""
#endif
+#define USE_IX86_FRAME_POINTER 1
+#define USE_X86_64_FRAME_POINTER 1
+
+#undef NO_PROFILE_COUNTERS
+
+#undef MCOUNT_NAME
+#define MCOUNT_NAME "_mcount"
+
/* We do not need NT_VERSION notes. */
#undef X86_FILE_START_VERSION_DIRECTIVE
#define X86_FILE_START_VERSION_DIRECTIVE false
diff --git a/gcc/config/i386/t-sol2-64 b/gcc/config/i386/t-sol2
index 4e70f0bed27..4e70f0bed27 100644
--- a/gcc/config/i386/t-sol2-64
+++ b/gcc/config/i386/t-sol2
diff --git a/gcc/config/m32c/m32c.c b/gcc/config/m32c/m32c.c
index 57cfb20ee16..837c22b2d77 100644
--- a/gcc/config/m32c/m32c.c
+++ b/gcc/config/m32c/m32c.c
@@ -3159,7 +3159,7 @@ m32c_illegal_subreg_p (rtx op)
{
int offset;
unsigned int i;
- int src_mode, dest_mode;
+ enum machine_mode src_mode, dest_mode;
if (GET_CODE (op) == MEM
&& ! m32c_legitimate_address_p (Pmode, XEXP (op, 0), false))
diff --git a/gcc/config/m32r/m32r.c b/gcc/config/m32r/m32r.c
index 83bc3a7bf3a..2b84b0ff1eb 100644
--- a/gcc/config/m32r/m32r.c
+++ b/gcc/config/m32r/m32r.c
@@ -282,31 +282,33 @@ init_reg_tables (void)
for (i = 0; i < NUM_MACHINE_MODES; i++)
{
- switch (GET_MODE_CLASS (i))
+ enum machine_mode m = (enum machine_mode) i;
+
+ switch (GET_MODE_CLASS (m))
{
case MODE_INT:
case MODE_PARTIAL_INT:
case MODE_COMPLEX_INT:
- if (GET_MODE_SIZE (i) <= 4)
+ if (GET_MODE_SIZE (m) <= 4)
m32r_mode_class[i] = 1 << (int) S_MODE;
- else if (GET_MODE_SIZE (i) == 8)
+ else if (GET_MODE_SIZE (m) == 8)
m32r_mode_class[i] = 1 << (int) D_MODE;
- else if (GET_MODE_SIZE (i) == 16)
+ else if (GET_MODE_SIZE (m) == 16)
m32r_mode_class[i] = 1 << (int) T_MODE;
- else if (GET_MODE_SIZE (i) == 32)
+ else if (GET_MODE_SIZE (m) == 32)
m32r_mode_class[i] = 1 << (int) O_MODE;
else
m32r_mode_class[i] = 0;
break;
case MODE_FLOAT:
case MODE_COMPLEX_FLOAT:
- if (GET_MODE_SIZE (i) <= 4)
+ if (GET_MODE_SIZE (m) <= 4)
m32r_mode_class[i] = 1 << (int) SF_MODE;
- else if (GET_MODE_SIZE (i) == 8)
+ else if (GET_MODE_SIZE (m) == 8)
m32r_mode_class[i] = 1 << (int) DF_MODE;
- else if (GET_MODE_SIZE (i) == 16)
+ else if (GET_MODE_SIZE (m) == 16)
m32r_mode_class[i] = 1 << (int) TF_MODE;
- else if (GET_MODE_SIZE (i) == 32)
+ else if (GET_MODE_SIZE (m) == 32)
m32r_mode_class[i] = 1 << (int) OF_MODE;
else
m32r_mode_class[i] = 0;
diff --git a/gcc/config/msp430/msp430.c b/gcc/config/msp430/msp430.c
index a637e27d41b..1ec96526efd 100644
--- a/gcc/config/msp430/msp430.c
+++ b/gcc/config/msp430/msp430.c
@@ -2162,7 +2162,7 @@ msp430_print_operand (FILE * file, rtx op, int letter)
because builtins are expanded before the frame layout is determined. */
fprintf (file, "%d",
msp430_initial_elimination_offset (ARG_POINTER_REGNUM, STACK_POINTER_REGNUM)
- - 2);
+ - (TARGET_LARGE ? 4 : 2));
return;
case 'J':
diff --git a/gcc/config/msp430/msp430.md b/gcc/config/msp430/msp430.md
index 5d930c37901..74a98b48019 100644
--- a/gcc/config/msp430/msp430.md
+++ b/gcc/config/msp430/msp430.md
@@ -1321,7 +1321,7 @@
[(set (match_operand:SI 0 "register_operand" "=r")
(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%0"))
(sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
- "optimize > 2 && msp430_hwmult_type != NONE && ! msp430_is_interrupt_func ()"
+ "optimize > 2 && msp430_hwmult_type != NONE"
"*
if (msp430_use_f5_series_hwmult ())
return \"PUSH.W sr { DINT { MOV.W %1, &0x04C2 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0 { POP.W sr\";
@@ -1365,6 +1365,6 @@
if (msp430_use_f5_series_hwmult ())
return \"PUSH.W sr { DINT { MOV.W %L1, &0x04D0 { MOV.W %H1, &0x04D2 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0 { POP.W sr\";
else
- return \"PUSH.W sr { DINT { MOV.W %L1, &0x0140 { MOV.W %H1, &0x0141 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\";
+ return \"PUSH.W sr { DINT { MOV.W %L1, &0x0140 { MOV.W %H1, &0x0142 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\";
"
)
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 1616b888c9c..47050c3d03e 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -629,14 +629,14 @@
(match_test "offsettable_nonstrict_memref_p (op)")))
;; Return 1 if the operand is suitable for load/store quad memory.
-;; This predicate only checks for non-atomic loads/stores.
+;; This predicate only checks for non-atomic loads/stores (not lqarx/stqcx).
(define_predicate "quad_memory_operand"
(match_code "mem")
{
rtx addr, op0, op1;
int ret;
- if (!TARGET_QUAD_MEMORY)
+ if (!TARGET_QUAD_MEMORY && !TARGET_SYNC_TI)
ret = 0;
else if (!memory_operand (op, mode))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 937eabf3727..f6da9b3a382 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -134,6 +134,7 @@
UNSPEC_DIVEUO
UNSPEC_UNPACK_128BIT
UNSPEC_PACK_128BIT
+ UNSPEC_LSQ
])
;;
diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md
index 7db439074cd..63152ed04d2 100644
--- a/gcc/config/rs6000/sync.md
+++ b/gcc/config/rs6000/sync.md
@@ -107,10 +107,17 @@
"isync"
[(set_attr "type" "isync")])
+;; Types that we should provide atomic instructions for.
+(define_mode_iterator AINT [QI
+ HI
+ SI
+ (DI "TARGET_POWERPC64")
+ (TI "TARGET_SYNC_TI")])
+
;; The control dependency used for load dependency described
;; in B.2.3 of the Power ISA 2.06B.
(define_insn "loadsync_<mode>"
- [(unspec_volatile:BLK [(match_operand:INT1 0 "register_operand" "r")]
+ [(unspec_volatile:BLK [(match_operand:AINT 0 "register_operand" "r")]
UNSPECV_ISYNC)
(clobber (match_scratch:CC 1 "=y"))]
""
@@ -118,18 +125,56 @@
[(set_attr "type" "isync")
(set_attr "length" "12")])
+(define_insn "load_quadpti"
+ [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
+ (unspec:PTI
+ [(match_operand:TI 1 "quad_memory_operand" "wQ")] UNSPEC_LSQ))]
+ "TARGET_SYNC_TI
+ && !reg_mentioned_p (operands[0], operands[1])"
+ "lq %0,%1"
+ [(set_attr "type" "load")
+ (set_attr "length" "4")])
+
(define_expand "atomic_load<mode>"
- [(set (match_operand:INT1 0 "register_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "")) ;; memory
+ [(set (match_operand:AINT 0 "register_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "")) ;; memory
(use (match_operand:SI 2 "const_int_operand" ""))] ;; model
""
{
+ if (<MODE>mode == TImode && !TARGET_SYNC_TI)
+ FAIL;
+
enum memmodel model = (enum memmodel) INTVAL (operands[2]);
if (model == MEMMODEL_SEQ_CST)
emit_insn (gen_hwsync ());
- emit_move_insn (operands[0], operands[1]);
+ if (<MODE>mode != TImode)
+ emit_move_insn (operands[0], operands[1]);
+ else
+ {
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx pti_reg = gen_reg_rtx (PTImode);
+
+ // Can't have indexed address for 'lq'
+ if (indexed_address (XEXP (op1, 0), TImode))
+ {
+ rtx old_addr = XEXP (op1, 0);
+ rtx new_addr = force_reg (Pmode, old_addr);
+ operands[1] = op1 = replace_equiv_address (op1, new_addr);
+ }
+
+ emit_insn (gen_load_quadpti (pti_reg, op1));
+
+ if (WORDS_BIG_ENDIAN)
+ emit_move_insn (op0, gen_lowpart (TImode, pti_reg));
+ else
+ {
+ emit_move_insn (gen_lowpart (DImode, op0), gen_highpart (DImode, pti_reg));
+ emit_move_insn (gen_highpart (DImode, op0), gen_lowpart (DImode, pti_reg));
+ }
+ }
switch (model)
{
@@ -146,12 +191,24 @@
DONE;
})
+(define_insn "store_quadpti"
+ [(set (match_operand:PTI 0 "quad_memory_operand" "=wQ")
+ (unspec:PTI
+ [(match_operand:PTI 1 "quad_int_reg_operand" "r")] UNSPEC_LSQ))]
+ "TARGET_SYNC_TI"
+ "stq %1,%0"
+ [(set_attr "type" "store")
+ (set_attr "length" "4")])
+
(define_expand "atomic_store<mode>"
- [(set (match_operand:INT1 0 "memory_operand" "") ;; memory
- (match_operand:INT1 1 "register_operand" "")) ;; input
+ [(set (match_operand:AINT 0 "memory_operand" "") ;; memory
+ (match_operand:AINT 1 "register_operand" "")) ;; input
(use (match_operand:SI 2 "const_int_operand" ""))] ;; model
""
{
+ if (<MODE>mode == TImode && !TARGET_SYNC_TI)
+ FAIL;
+
enum memmodel model = (enum memmodel) INTVAL (operands[2]);
switch (model)
{
@@ -166,7 +223,33 @@
default:
gcc_unreachable ();
}
- emit_move_insn (operands[0], operands[1]);
+ if (<MODE>mode != TImode)
+ emit_move_insn (operands[0], operands[1]);
+ else
+ {
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx pti_reg = gen_reg_rtx (PTImode);
+
+ // Can't have indexed address for 'stq'
+ if (indexed_address (XEXP (op0, 0), TImode))
+ {
+ rtx old_addr = XEXP (op0, 0);
+ rtx new_addr = force_reg (Pmode, old_addr);
+ operands[0] = op0 = replace_equiv_address (op0, new_addr);
+ }
+
+ if (WORDS_BIG_ENDIAN)
+ emit_move_insn (pti_reg, gen_lowpart (PTImode, op1));
+ else
+ {
+ emit_move_insn (gen_lowpart (DImode, pti_reg), gen_highpart (DImode, op1));
+ emit_move_insn (gen_highpart (DImode, pti_reg), gen_lowpart (DImode, op1));
+ }
+
+ emit_insn (gen_store_quadpti (gen_lowpart (PTImode, op0), pti_reg));
+ }
+
DONE;
})
@@ -180,14 +263,6 @@
SI
(DI "TARGET_POWERPC64")])
-;; Types that we should provide atomic instructions for.
-
-(define_mode_iterator AINT [QI
- HI
- SI
- (DI "TARGET_POWERPC64")
- (TI "TARGET_SYNC_TI")])
-
(define_insn "load_locked<mode>"
[(set (match_operand:ATOMIC 0 "int_reg_operand" "=r")
(unspec_volatile:ATOMIC
diff --git a/gcc/config/sol2-10.h b/gcc/config/sol2-10.h
deleted file mode 100644
index 4488a40cba6..00000000000
--- a/gcc/config/sol2-10.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Operating system specific defines to be used when targeting GCC for any
- Solaris 2 system starting from Solaris 10.
- Copyright (C) 2006-2014 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-/* Solaris 10 has the float and long double forms of math functions.
- We redefine this hook so the version from elfos.h header won't be used. */
-#undef TARGET_LIBC_HAS_FUNCTION
-#define TARGET_LIBC_HAS_FUNCTION default_libc_has_function
diff --git a/gcc/config/sol2-bi.h b/gcc/config/sol2-bi.h
deleted file mode 100644
index fdb2a28178c..00000000000
--- a/gcc/config/sol2-bi.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/* Definitions of target machine for GCC, for bi-arch Solaris 2.
- Copyright (C) 2011-2014 Free Software Foundation, Inc.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify it under
- the terms of the GNU General Public License as published by the Free
- Software Foundation; either version 3, or (at your option) any later
- version.
-
- GCC is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-/* wchar_t is called differently in <wchar.h> for 32 and 64-bit
- compilations. This is called for by SCD 2.4.1, p. 6-83, Figure 6-65
- (32-bit) and p. 6P-10, Figure 6.38 (64-bit). */
-
-#undef WCHAR_TYPE
-#define WCHAR_TYPE (TARGET_64BIT ? "int" : "long int")
-
-#undef WCHAR_TYPE_SIZE
-#define WCHAR_TYPE_SIZE 32
-
-/* Same for wint_t. See SCD 2.4.1, p. 6-83, Figure 6-66 (32-bit). There's
- no corresponding 64-bit definition, but this is what Solaris 8
- <iso/wchar_iso.h> uses. */
-
-#undef WINT_TYPE
-#define WINT_TYPE (TARGET_64BIT ? "int" : "long int")
-
-#undef WINT_TYPE_SIZE
-#define WINT_TYPE_SIZE 32
-
-#if DEFAULT_ARCH32_P
-#define MULTILIB_DEFAULTS { "m32" }
-#else
-#define MULTILIB_DEFAULTS { "m64" }
-#endif
-
-#if DEFAULT_ARCH32_P
-#define DEF_ARCH32_SPEC(__str) "%{!m64:" __str "}"
-#define DEF_ARCH64_SPEC(__str) "%{m64:" __str "}"
-#else
-#define DEF_ARCH32_SPEC(__str) "%{m32:" __str "}"
-#define DEF_ARCH64_SPEC(__str) "%{!m32:" __str "}"
-#endif
-
-#undef ASM_CPU_DEFAULT_SPEC
-#define ASM_CPU_DEFAULT_SPEC \
-(DEFAULT_ARCH32_P ? "\
-%{m64:" ASM_CPU64_DEFAULT_SPEC "} \
-%{!m64:" ASM_CPU32_DEFAULT_SPEC "} \
-" : "\
-%{m32:" ASM_CPU32_DEFAULT_SPEC "} \
-%{!m32:" ASM_CPU64_DEFAULT_SPEC "} \
-")
-
-/* This should be the same as LINK_ARCH32_SPEC_BASE, except with
- ARCH64_SUBDIR appended to the paths and /usr/ccs/lib is no longer
- necessary. */
-#undef LINK_ARCH64_SPEC_BASE
-#define LINK_ARCH64_SPEC_BASE \
- "%{G:-G} \
- %{YP,*} \
- %{R*} \
- %{!YP,*:%{p|pg:-Y P,%R/usr/lib/libp/" ARCH64_SUBDIR ":%R/lib/" ARCH64_SUBDIR ":%R/usr/lib/" ARCH64_SUBDIR "} \
- %{!p:%{!pg:-Y P,%R/lib/" ARCH64_SUBDIR ":%R/usr/lib/" ARCH64_SUBDIR "}}}"
-
-#undef LINK_ARCH64_SPEC
-#ifndef USE_GLD
-/* FIXME: Used to be SPARC-only. Not SPARC-specfic but for the model name! */
-#define LINK_ARCH64_SPEC \
- "%{mcmodel=medlow:-M /usr/lib/ld/" ARCH64_SUBDIR "/map.below4G} " \
- LINK_ARCH64_SPEC_BASE
-#else
-#define LINK_ARCH64_SPEC LINK_ARCH64_SPEC_BASE
-#endif
-
-#ifdef USE_GLD
-#if DEFAULT_ARCH32_P
-#define ARCH_DEFAULT_EMULATION ARCH32_EMULATION
-#else
-#define ARCH_DEFAULT_EMULATION ARCH64_EMULATION
-#endif
-#define TARGET_LD_EMULATION "%{m32:-m " ARCH32_EMULATION "}" \
- "%{m64:-m " ARCH64_EMULATION "}" \
- "%{!m32:%{!m64:-m " ARCH_DEFAULT_EMULATION "}} "
-#else
-#define TARGET_LD_EMULATION ""
-#endif
-
-#undef LINK_ARCH_SPEC
-#if DISABLE_MULTILIB
-#if DEFAULT_ARCH32_P
-#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \
-%{m32:%(link_arch32)} \
-%{m64:%edoes not support multilib} \
-%{!m32:%{!m64:%(link_arch_default)}} \
-"
-#else
-#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \
-%{m32:%edoes not support multilib} \
-%{m64:%(link_arch64)} \
-%{!m32:%{!m64:%(link_arch_default)}} \
-"
-#endif
-#else
-#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \
-%{m32:%(link_arch32)} \
-%{m64:%(link_arch64)} \
-%{!m32:%{!m64:%(link_arch_default)}}"
-#endif
-
-#define LINK_ARCH_DEFAULT_SPEC \
-(DEFAULT_ARCH32_P ? LINK_ARCH32_SPEC : LINK_ARCH64_SPEC)
-
-#undef SUBTARGET_EXTRA_SPECS
-#define SUBTARGET_EXTRA_SPECS \
- { "startfile_arch", STARTFILE_ARCH_SPEC }, \
- { "link_arch32", LINK_ARCH32_SPEC }, \
- { "link_arch64", LINK_ARCH64_SPEC }, \
- { "link_arch_default", LINK_ARCH_DEFAULT_SPEC }, \
- { "link_arch", LINK_ARCH_SPEC }, \
- SUBTARGET_CPU_EXTRA_SPECS
diff --git a/gcc/config/sol2.h b/gcc/config/sol2.h
index a21c953b035..d88de37bd2e 100644
--- a/gcc/config/sol2.h
+++ b/gcc/config/sol2.h
@@ -21,20 +21,25 @@ along with GCC; see the file COPYING3. If not see
/* We are compiling for Solaris 2 now. */
#define TARGET_SOLARIS 1
-/* Solaris 2 (at least as of 2.5.1) uses a 32-bit wchar_t. */
+/* wchar_t is called differently in <wchar.h> for 32 and 64-bit
+ compilations. This is called for by SCD 2.4.1, p. 6-83, Figure 6-65
+ (32-bit) and p. 6P-10, Figure 6.38 (64-bit). */
+
#undef WCHAR_TYPE
-#define WCHAR_TYPE "long int"
+#define WCHAR_TYPE (TARGET_64BIT ? "int" : "long int")
#undef WCHAR_TYPE_SIZE
-#define WCHAR_TYPE_SIZE BITS_PER_WORD
+#define WCHAR_TYPE_SIZE 32
+
+/* Same for wint_t. See SCD 2.4.1, p. 6-83, Figure 6-66 (32-bit). There's
+ no corresponding 64-bit definition, but this is what Solaris 8
+ <iso/wchar_iso.h> uses. */
-/* Solaris 2 uses a wint_t different from the default. This is required
- by the SCD 2.4.1, p. 6-83, Figure 6-66. */
-#undef WINT_TYPE
-#define WINT_TYPE "long int"
+#undef WINT_TYPE
+#define WINT_TYPE (TARGET_64BIT ? "int" : "long int")
-#undef WINT_TYPE_SIZE
-#define WINT_TYPE_SIZE BITS_PER_WORD
+#undef WINT_TYPE_SIZE
+#define WINT_TYPE_SIZE 32
#define SIG_ATOMIC_TYPE "int"
@@ -103,6 +108,19 @@ along with GCC; see the file COPYING3. If not see
solaris_override_options (); \
} while (0)
+#if DEFAULT_ARCH32_P
+#define MULTILIB_DEFAULTS { "m32" }
+#else
+#define MULTILIB_DEFAULTS { "m64" }
+#endif
+
+#if DEFAULT_ARCH32_P
+#define DEF_ARCH32_SPEC(__str) "%{!m64:" __str "}"
+#define DEF_ARCH64_SPEC(__str) "%{m64:" __str "}"
+#else
+#define DEF_ARCH32_SPEC(__str) "%{m32:" __str "}"
+#define DEF_ARCH64_SPEC(__str) "%{!m32:" __str "}"
+#endif
/* It's safe to pass -s always, even if -g is not used. Those options are
handled by both Sun as and GNU as. */
@@ -111,6 +129,16 @@ along with GCC; see the file COPYING3. If not see
#define ASM_PIC_SPEC " %{fpic|fpie|fPIC|fPIE:-K PIC}"
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? "\
+%{m64:" ASM_CPU64_DEFAULT_SPEC "} \
+%{!m64:" ASM_CPU32_DEFAULT_SPEC "} \
+" : "\
+%{m32:" ASM_CPU32_DEFAULT_SPEC "} \
+%{!m32:" ASM_CPU64_DEFAULT_SPEC "} \
+")
+
#undef LIB_SPEC
#define LIB_SPEC \
"%{!symbolic:\
@@ -120,17 +148,11 @@ along with GCC; see the file COPYING3. If not see
#ifndef CROSS_DIRECTORY_STRUCTURE
#undef MD_EXEC_PREFIX
#define MD_EXEC_PREFIX "/usr/ccs/bin/"
-
-#undef MD_STARTFILE_PREFIX
-#define MD_STARTFILE_PREFIX "/usr/ccs/lib/"
#endif
-#undef STARTFILE_ARCH32_SPEC
-#define STARTFILE_ARCH32_SPEC "%{ansi:values-Xc.o%s} \
- %{!ansi:values-Xa.o%s}"
-
#undef STARTFILE_ARCH_SPEC
-#define STARTFILE_ARCH_SPEC STARTFILE_ARCH32_SPEC
+#define STARTFILE_ARCH_SPEC "%{ansi:values-Xc.o%s} \
+ %{!ansi:values-Xa.o%s}"
/* We don't use the standard svr4 STARTFILE_SPEC because it's wrong for us. */
#undef STARTFILE_SPEC
@@ -153,14 +175,78 @@ along with GCC; see the file COPYING3. If not see
"%{G:-G} \
%{YP,*} \
%{R*} \
- %{!YP,*:%{p|pg:-Y P,%R/usr/ccs/lib/libp:%R/usr/lib/libp:%R/usr/ccs/lib:%R/lib:%R/usr/lib} \
- %{!p:%{!pg:-Y P,%R/usr/ccs/lib:%R/lib:%R/usr/lib}}}"
+ %{!YP,*:%{p|pg:-Y P,%R/usr/lib/libp%R/lib:%R/usr/lib} \
+ %{!p:%{!pg:-Y P,%R/lib:%R/usr/lib}}}"
#undef LINK_ARCH32_SPEC
#define LINK_ARCH32_SPEC LINK_ARCH32_SPEC_BASE
+/* This should be the same as LINK_ARCH32_SPEC_BASE, except with
+ ARCH64_SUBDIR appended to the paths. */
+#undef LINK_ARCH64_SPEC_BASE
+#define LINK_ARCH64_SPEC_BASE \
+ "%{G:-G} \
+ %{YP,*} \
+ %{R*} \
+ %{!YP,*:%{p|pg:-Y P,%R/usr/lib/libp/" ARCH64_SUBDIR ":%R/lib/" ARCH64_SUBDIR ":%R/usr/lib/" ARCH64_SUBDIR "} \
+ %{!p:%{!pg:-Y P,%R/lib/" ARCH64_SUBDIR ":%R/usr/lib/" ARCH64_SUBDIR "}}}"
+
+#undef LINK_ARCH64_SPEC
+#ifndef USE_GLD
+/* FIXME: Used to be SPARC-only. Not SPARC-specfic but for the model name! */
+#define LINK_ARCH64_SPEC \
+ "%{mcmodel=medlow:-M /usr/lib/ld/" ARCH64_SUBDIR "/map.below4G} " \
+ LINK_ARCH64_SPEC_BASE
+#else
+#define LINK_ARCH64_SPEC LINK_ARCH64_SPEC_BASE
+#endif
+
+#ifdef USE_GLD
+#if DEFAULT_ARCH32_P
+#define ARCH_DEFAULT_EMULATION ARCH32_EMULATION
+#else
+#define ARCH_DEFAULT_EMULATION ARCH64_EMULATION
+#endif
+#define TARGET_LD_EMULATION "%{m32:-m " ARCH32_EMULATION "}" \
+ "%{m64:-m " ARCH64_EMULATION "}" \
+ "%{!m32:%{!m64:-m " ARCH_DEFAULT_EMULATION "}} "
+#else
+#define TARGET_LD_EMULATION ""
+#endif
+
#undef LINK_ARCH_SPEC
-#define LINK_ARCH_SPEC LINK_ARCH32_SPEC
+#if DISABLE_MULTILIB
+#if DEFAULT_ARCH32_P
+#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \
+%{m32:%(link_arch32)} \
+%{m64:%edoes not support multilib} \
+%{!m32:%{!m64:%(link_arch_default)}} \
+"
+#else
+#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \
+%{m32:%edoes not support multilib} \
+%{m64:%(link_arch64)} \
+%{!m32:%{!m64:%(link_arch_default)}} \
+"
+#endif
+#else
+#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \
+%{m32:%(link_arch32)} \
+%{m64:%(link_arch64)} \
+%{!m32:%{!m64:%(link_arch_default)}}"
+#endif
+
+#define LINK_ARCH_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? LINK_ARCH32_SPEC : LINK_ARCH64_SPEC)
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+ { "startfile_arch", STARTFILE_ARCH_SPEC }, \
+ { "link_arch32", LINK_ARCH32_SPEC }, \
+ { "link_arch64", LINK_ARCH64_SPEC }, \
+ { "link_arch_default", LINK_ARCH_DEFAULT_SPEC }, \
+ { "link_arch", LINK_ARCH_SPEC }, \
+ SUBTARGET_CPU_EXTRA_SPECS
/* C++11 programs need -lrt for nanosleep. */
#define TIME_LIBRARY "rt"
@@ -235,7 +321,9 @@ along with GCC; see the file COPYING3. If not see
#define TARGET_CXX_DECL_MANGLING_CONTEXT solaris_cxx_decl_mangling_context
/* Solaris/x86 as and gas support unquoted section names. */
+#ifndef SECTION_NAME_FORMAT
#define SECTION_NAME_FORMAT "%s"
+#endif
/* This is how to declare the size of a function. For Solaris, we output
any .init or .fini entries here. */
@@ -249,23 +337,6 @@ along with GCC; see the file COPYING3. If not see
} \
while (0)
-/* Solaris as has a bug: a .common directive in .tbss or .tdata section
- behaves as .tls_common rather than normal non-TLS .common. */
-#undef ASM_OUTPUT_ALIGNED_COMMON
-#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \
- do \
- { \
- if (TARGET_SUN_TLS \
- && in_section \
- && ((in_section->common.flags & SECTION_TLS) == SECTION_TLS)) \
- switch_to_section (bss_section); \
- fprintf ((FILE), "%s", COMMON_ASM_OP); \
- assemble_name ((FILE), (NAME)); \
- fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", \
- (SIZE), (ALIGN) / BITS_PER_UNIT); \
- } \
- while (0)
-
#ifndef USE_GAS
#undef TARGET_ASM_ASSEMBLE_VISIBILITY
#define TARGET_ASM_ASSEMBLE_VISIBILITY solaris_assemble_visibility
@@ -291,8 +362,10 @@ along with GCC; see the file COPYING3. If not see
#define TARGET_POSIX_IO
+/* Solaris 10 has the float and long double forms of math functions.
+ We redefine this hook so the version from elfos.h header won't be used. */
#undef TARGET_LIBC_HAS_FUNCTION
-#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+#define TARGET_LIBC_HAS_FUNCTION default_libc_has_function
extern GTY(()) tree solaris_pending_aligns;
extern GTY(()) tree solaris_pending_inits;
diff --git a/gcc/config/sparc/sol2.h b/gcc/config/sparc/sol2.h
index b50a937b26f..2fcdc85c81f 100644
--- a/gcc/config/sparc/sol2.h
+++ b/gcc/config/sparc/sol2.h
@@ -109,8 +109,6 @@ along with GCC; see the file COPYING3. If not see
#define CPP_CPU64_DEFAULT_SPEC ""
#undef ASM_CPU32_DEFAULT_SPEC
#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plus"
-#undef ASM_CPU_DEFAULT_SPEC
-#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
#endif
#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
@@ -120,8 +118,6 @@ along with GCC; see the file COPYING3. If not see
#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusa"
#undef ASM_CPU64_DEFAULT_SPEC
#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9a"
-#undef ASM_CPU_DEFAULT_SPEC
-#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
#endif
#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3
@@ -131,8 +127,6 @@ along with GCC; see the file COPYING3. If not see
#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb"
#undef ASM_CPU64_DEFAULT_SPEC
#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9b"
-#undef ASM_CPU_DEFAULT_SPEC
-#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
#endif
#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara
@@ -142,8 +136,6 @@ along with GCC; see the file COPYING3. If not see
#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb"
#undef ASM_CPU64_DEFAULT_SPEC
#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9b"
-#undef ASM_CPU_DEFAULT_SPEC
-#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
#endif
#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara2
@@ -153,8 +145,6 @@ along with GCC; see the file COPYING3. If not see
#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb"
#undef ASM_CPU64_DEFAULT_SPEC
#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9b"
-#undef ASM_CPU_DEFAULT_SPEC
-#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
#endif
#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara3
@@ -164,8 +154,6 @@ along with GCC; see the file COPYING3. If not see
#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plus" AS_NIAGARA3_FLAG
#undef ASM_CPU64_DEFAULT_SPEC
#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9" AS_NIAGARA3_FLAG
-#undef ASM_CPU_DEFAULT_SPEC
-#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
#endif
#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara4
@@ -175,8 +163,6 @@ along with GCC; see the file COPYING3. If not see
#define ASM_CPU32_DEFAULT_SPEC AS_SPARC32_FLAG AS_NIAGARA4_FLAG
#undef ASM_CPU64_DEFAULT_SPEC
#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG AS_NIAGARA4_FLAG
-#undef ASM_CPU_DEFAULT_SPEC
-#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
#endif
#undef CPP_CPU_SPEC
@@ -361,6 +347,23 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
} \
while (0)
+/* Solaris as has a bug: a .common directive in .tbss or .tdata section
+ behaves as .tls_common rather than normal non-TLS .common. */
+#undef ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \
+ do \
+ { \
+ if (TARGET_SUN_TLS \
+ && in_section \
+ && ((in_section->common.flags & SECTION_TLS) == SECTION_TLS)) \
+ switch_to_section (bss_section); \
+ fprintf ((FILE), "%s", COMMON_ASM_OP); \
+ assemble_name ((FILE), (NAME)); \
+ fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", \
+ (SIZE), (ALIGN) / BITS_PER_UNIT); \
+ } \
+ while (0)
+
#ifndef USE_GAS
/* This is how to output an assembler line that says to advance
the location counter to a multiple of 2**LOG bytes using the
@@ -376,7 +379,6 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
/* Sun as requires doublequoted section names on SPARC. While GNU as
supports that, too, we prefer the standard variant. */
-#undef SECTION_NAME_FORMAT
#define SECTION_NAME_FORMAT "\"%s\""
#endif /* !USE_GAS */
diff --git a/gcc/config/sparc/t-sol2-64 b/gcc/config/sparc/t-sol2
index ec7e4eba6fd..ec7e4eba6fd 100644
--- a/gcc/config/sparc/t-sol2-64
+++ b/gcc/config/sparc/t-sol2