diff options
author | mrs <mrs@138bc75d-0d04-0410-961f-82ee72b054a4> | 2014-04-30 17:19:02 +0000 |
---|---|---|
committer | mrs <mrs@138bc75d-0d04-0410-961f-82ee72b054a4> | 2014-04-30 17:19:02 +0000 |
commit | 951019127b9f3cf59989ceb74539d454d535d3b5 (patch) | |
tree | 958443e6076f37a3ff7d07d9303f5c4171944638 /gcc/config | |
parent | bedbed50ff969478b0c8878678d7c169a8a0cdaf (diff) | |
parent | 8a21d39cf1ea8183eb1e95bbb4396d97ae0dc36d (diff) | |
download | gcc-951019127b9f3cf59989ceb74539d454d535d3b5.tar.gz |
Merge in trunk.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/wide-int@209944 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
26 files changed, 1590 insertions, 1437 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 4616ad24c07..a3019828a93 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -246,6 +246,11 @@ aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_pointer_map_mode, qualifier_none }; #define TYPES_STORE1 (aarch64_types_store1_qualifiers) #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_pointer_map_mode, + qualifier_none, qualifier_none }; +#define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers) #define CF0(N, X) CODE_FOR_aarch64_##N##X #define CF1(N, X) CODE_FOR_##N##X##1 diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 5542f023b33..04cbc780da2 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -175,6 +175,8 @@ bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx); bool aarch64_is_long_call_p (rtx); bool aarch64_label_mentioned_p (rtx); bool aarch64_legitimate_pic_operand_p (rtx); +bool aarch64_modes_tieable_p (enum machine_mode mode1, + enum machine_mode mode2); bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode); bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context, enum machine_mode); diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index fa332ae5948..339e8f86a4b 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -118,6 +118,10 @@ BUILTIN_VQ (STORESTRUCT, st3, 0) BUILTIN_VQ (STORESTRUCT, st4, 0) + BUILTIN_VQ (STORESTRUCT_LANE, st2_lane, 0) + BUILTIN_VQ (STORESTRUCT_LANE, st3_lane, 0) + BUILTIN_VQ (STORESTRUCT_LANE, st4_lane, 0) + BUILTIN_VQW (BINOP, saddl2, 0) BUILTIN_VQW (BINOP, uaddl2, 0) BUILTIN_VQW (BINOP, ssubl2, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c05767b2045..108bc8d8893 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3686,6 +3686,17 @@ [(set_attr "type" "neon_store2_2reg<q>")] ) +(define_insn "vec_store_lanesoi_lane<mode>" + [(set (match_operand:<V_TWO_ELEM> 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:<V_TWO_ELEM> [(match_operand:OI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_ST2_LANE))] + "TARGET_SIMD" + "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0" + [(set_attr "type" "neon_store3_one_lane<q>")] +) + (define_insn "vec_load_lanesci<mode>" [(set (match_operand:CI 0 "register_operand" "=w") (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") @@ -3706,6 +3717,17 @@ [(set_attr "type" "neon_store3_3reg<q>")] ) +(define_insn "vec_store_lanesci_lane<mode>" + [(set (match_operand:<V_THREE_ELEM> 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:<V_THREE_ELEM> [(match_operand:CI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_ST3_LANE))] + "TARGET_SIMD" + "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0" + [(set_attr "type" "neon_store3_one_lane<q>")] +) + (define_insn "vec_load_lanesxi<mode>" [(set (match_operand:XI 0 "register_operand" "=w") (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") @@ -3726,6 +3748,17 @@ [(set_attr "type" "neon_store4_4reg<q>")] ) +(define_insn "vec_store_lanesxi_lane<mode>" + [(set (match_operand:<V_FOUR_ELEM> 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:<V_FOUR_ELEM> [(match_operand:XI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_ST4_LANE))] + "TARGET_SIMD" + "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0" + [(set_attr "type" "neon_store4_one_lane<q>")] +) + ;; Reload patterns for AdvSIMD register list operands. (define_expand "mov<mode>" @@ -4220,6 +4253,57 @@ DONE; }) +(define_expand "aarch64_st2_lane<VQ:mode>" + [(match_operand:DI 0 "register_operand" "r") + (match_operand:OI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" +{ + enum machine_mode mode = <V_TWO_ELEM>mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); + operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); + + emit_insn (gen_vec_store_lanesoi_lane<VQ:mode> (mem, + operands[1], + operands[2])); + DONE; +}) + +(define_expand "aarch64_st3_lane<VQ:mode>" + [(match_operand:DI 0 "register_operand" "r") + (match_operand:CI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" +{ + enum machine_mode mode = <V_THREE_ELEM>mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); + operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); + + emit_insn (gen_vec_store_lanesci_lane<VQ:mode> (mem, + operands[1], + operands[2])); + DONE; +}) + +(define_expand "aarch64_st4_lane<VQ:mode>" + [(match_operand:DI 0 "register_operand" "r") + (match_operand:XI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" +{ + enum machine_mode mode = <V_FOUR_ELEM>mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); + operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); + + emit_insn (gen_vec_store_lanesxi_lane<VQ:mode> (mem, + operands[1], + operands[2])); + DONE; +}) + (define_expand "aarch64_st1<VALL:mode>" [(match_operand:DI 0 "register_operand") (match_operand:VALL 1 "register_operand")] diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 1d48108516d..d3d7d1e60d6 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -8316,7 +8316,8 @@ aarch64_cannot_change_mode_class (enum machine_mode from, /* Limited combinations of subregs are safe on FPREGs. Particularly, 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed. 2. Scalar to Scalar for integer modes or same size float modes. - 3. Vector to Vector modes. */ + 3. Vector to Vector modes. + 4. On little-endian only, Vector-Structure to Vector modes. */ if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to)) { if (aarch64_vector_mode_supported_p (from) @@ -8332,11 +8333,41 @@ aarch64_cannot_change_mode_class (enum machine_mode from, if (aarch64_vector_mode_supported_p (from) && aarch64_vector_mode_supported_p (to)) return false; + + /* Within an vector structure straddling multiple vector registers + we are in a mixed-endian representation. As such, we can't + easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can + switch between vectors and vector structures cheaply. */ + if (!BYTES_BIG_ENDIAN) + if ((aarch64_vector_mode_supported_p (from) + && aarch64_vect_struct_mode_p (to)) + || (aarch64_vector_mode_supported_p (to) + && aarch64_vect_struct_mode_p (from))) + return false; } return true; } +/* Implement MODES_TIEABLE_P. */ + +bool +aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) +{ + if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)) + return true; + + /* We specifically want to allow elements of "structure" modes to + be tieable to the structure. This more general condition allows + other rarer situations too. */ + if (TARGET_SIMD + && aarch64_vector_mode_p (mode1) + && aarch64_vector_mode_p (mode2)) + return true; + + return false; +} + #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST aarch64_address_cost diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index e2b6c8e2908..c9b30d01865 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -365,8 +365,7 @@ extern unsigned long aarch64_tune_flags; #define HARD_REGNO_MODE_OK(REGNO, MODE) aarch64_hard_regno_mode_ok (REGNO, MODE) -#define MODES_TIEABLE_P(MODE1, MODE2) \ - (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)) +#define MODES_TIEABLE_P(MODE1, MODE2) aarch64_modes_tieable_p (MODE1, MODE2) #define DWARF2_UNWIND_INFO 1 diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 7965db4c9c7..266d7873a5a 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -98,6 +98,9 @@ UNSPEC_ST2 UNSPEC_ST3 UNSPEC_ST4 + UNSPEC_ST2_LANE + UNSPEC_ST3_LANE + UNSPEC_ST4_LANE UNSPEC_TLS UNSPEC_TLSDESC UNSPEC_USHL_2S @@ -2426,6 +2429,25 @@ } ) +(define_expand "mov<mode>cc" + [(set (match_operand:GPF 0 "register_operand" "") + (if_then_else:GPF (match_operand 1 "aarch64_comparison_operator" "") + (match_operand:GPF 2 "register_operand" "") + (match_operand:GPF 3 "register_operand" "")))] + "" + { + rtx ccreg; + enum rtx_code code = GET_CODE (operands[1]); + + if (code == UNEQ || code == LTGT) + FAIL; + + ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1)); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + } +) + (define_insn "*csinc2<mode>_insn" [(set (match_operand:GPI 0 "register_operand" "=r") (plus:GPI (match_operator:GPI 2 "aarch64_comparison_operator" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 9f1fa98e6fb..f6213ce2aea 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -13199,929 +13199,6 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b) : /* No clobbers */); return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vuzp1_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vuzp1_p8 (poly8x8_t a, poly8x8_t b) -{ - poly8x8_t result; - __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vuzp1_p16 (poly16x4_t a, poly16x4_t b) -{ - poly16x4_t result; - __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vuzp1_s8 (int8x8_t a, int8x8_t b) -{ - int8x8_t result; - __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vuzp1_s16 (int16x4_t a, int16x4_t b) -{ - int16x4_t result; - __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vuzp1_s32 (int32x2_t a, int32x2_t b) -{ - int32x2_t result; - __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vuzp1_u8 (uint8x8_t a, uint8x8_t b) -{ - uint8x8_t result; - __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vuzp1_u16 (uint16x4_t a, uint16x4_t b) -{ - uint16x4_t result; - __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vuzp1_u32 (uint32x2_t a, uint32x2_t b) -{ - uint32x2_t result; - __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vuzp1q_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vuzp1q_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vuzp1q_p8 (poly8x16_t a, poly8x16_t b) -{ - poly8x16_t result; - __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vuzp1q_p16 (poly16x8_t a, poly16x8_t b) -{ - poly16x8_t result; - __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vuzp1q_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vuzp1q_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vuzp1q_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vuzp1q_s64 (int64x2_t a, int64x2_t b) -{ - int64x2_t result; - __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vuzp1q_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vuzp1q_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vuzp1q_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vuzp1q_u64 (uint64x2_t a, uint64x2_t b) -{ - uint64x2_t result; - __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vuzp2_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vuzp2_p8 (poly8x8_t a, poly8x8_t b) -{ - poly8x8_t result; - __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vuzp2_p16 (poly16x4_t a, poly16x4_t b) -{ - poly16x4_t result; - __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vuzp2_s8 (int8x8_t a, int8x8_t b) -{ - int8x8_t result; - __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vuzp2_s16 (int16x4_t a, int16x4_t b) -{ - int16x4_t result; - __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vuzp2_s32 (int32x2_t a, int32x2_t b) -{ - int32x2_t result; - __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vuzp2_u8 (uint8x8_t a, uint8x8_t b) -{ - uint8x8_t result; - __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vuzp2_u16 (uint16x4_t a, uint16x4_t b) -{ - uint16x4_t result; - __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vuzp2_u32 (uint32x2_t a, uint32x2_t b) -{ - uint32x2_t result; - __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vuzp2q_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vuzp2q_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vuzp2q_p8 (poly8x16_t a, poly8x16_t b) -{ - poly8x16_t result; - __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vuzp2q_p16 (poly16x8_t a, poly16x8_t b) -{ - poly16x8_t result; - __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vuzp2q_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vuzp2q_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vuzp2q_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vuzp2q_s64 (int64x2_t a, int64x2_t b) -{ - int64x2_t result; - __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vuzp2q_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vuzp2q_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vuzp2q_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vuzp2q_u64 (uint64x2_t a, uint64x2_t b) -{ - uint64x2_t result; - __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vzip1_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("zip1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vzip1_p8 (poly8x8_t a, poly8x8_t b) -{ - poly8x8_t result; - __asm__ ("zip1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vzip1_p16 (poly16x4_t a, poly16x4_t b) -{ - poly16x4_t result; - __asm__ ("zip1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vzip1_s8 (int8x8_t a, int8x8_t b) -{ - int8x8_t result; - __asm__ ("zip1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vzip1_s16 (int16x4_t a, int16x4_t b) -{ - int16x4_t result; - __asm__ ("zip1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vzip1_s32 (int32x2_t a, int32x2_t b) -{ - int32x2_t result; - __asm__ ("zip1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vzip1_u8 (uint8x8_t a, uint8x8_t b) -{ - uint8x8_t result; - __asm__ ("zip1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vzip1_u16 (uint16x4_t a, uint16x4_t b) -{ - uint16x4_t result; - __asm__ ("zip1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vzip1_u32 (uint32x2_t a, uint32x2_t b) -{ - uint32x2_t result; - __asm__ ("zip1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vzip1q_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("zip1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vzip1q_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("zip1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vzip1q_p8 (poly8x16_t a, poly8x16_t b) -{ - poly8x16_t result; - __asm__ ("zip1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vzip1q_p16 (poly16x8_t a, poly16x8_t b) -{ - poly16x8_t result; - __asm__ ("zip1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vzip1q_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("zip1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vzip1q_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("zip1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vzip1q_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("zip1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vzip1q_s64 (int64x2_t a, int64x2_t b) -{ - int64x2_t result; - __asm__ ("zip1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vzip1q_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("zip1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vzip1q_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("zip1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vzip1q_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("zip1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vzip1q_u64 (uint64x2_t a, uint64x2_t b) -{ - uint64x2_t result; - __asm__ ("zip1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vzip2_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("zip2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vzip2_p8 (poly8x8_t a, poly8x8_t b) -{ - poly8x8_t result; - __asm__ ("zip2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vzip2_p16 (poly16x4_t a, poly16x4_t b) -{ - poly16x4_t result; - __asm__ ("zip2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vzip2_s8 (int8x8_t a, int8x8_t b) -{ - int8x8_t result; - __asm__ ("zip2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vzip2_s16 (int16x4_t a, int16x4_t b) -{ - int16x4_t result; - __asm__ ("zip2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vzip2_s32 (int32x2_t a, int32x2_t b) -{ - int32x2_t result; - __asm__ ("zip2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vzip2_u8 (uint8x8_t a, uint8x8_t b) -{ - uint8x8_t result; - __asm__ ("zip2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vzip2_u16 (uint16x4_t a, uint16x4_t b) -{ - uint16x4_t result; - __asm__ ("zip2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vzip2_u32 (uint32x2_t a, uint32x2_t b) -{ - uint32x2_t result; - __asm__ ("zip2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vzip2q_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("zip2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vzip2q_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("zip2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vzip2q_p8 (poly8x16_t a, poly8x16_t b) -{ - poly8x16_t result; - __asm__ ("zip2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vzip2q_p16 (poly16x8_t a, poly16x8_t b) -{ - poly16x8_t result; - __asm__ ("zip2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vzip2q_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("zip2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vzip2q_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("zip2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vzip2q_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("zip2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vzip2q_s64 (int64x2_t a, int64x2_t b) -{ - int64x2_t result; - __asm__ ("zip2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vzip2q_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("zip2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vzip2q_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("zip2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vzip2q_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("zip2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vzip2q_u64 (uint64x2_t a, uint64x2_t b) -{ - uint64x2_t result; - __asm__ ("zip2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} /* End of temporary inline asm implementations. */ @@ -14452,131 +13529,224 @@ __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q) __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) -#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \ - lnsuffix, funcsuffix, Q) \ - typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype; \ - __extension__ static __inline void \ - __attribute__ ((__always_inline__)) \ - vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ - intype b, const int c) \ - { \ - __ST2_LANE_STRUCTURE_##intype *__p = \ - (__ST2_LANE_STRUCTURE_##intype *)ptr; \ - __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \ - "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \ - : "=Q"(*__p) \ - : "Q"(b), "i"(c) \ - : "v16", "v17"); \ - } - -__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,) -__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,) -__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,) -__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,) -__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,) -__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,) -__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,) -__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,) -__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,) -__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,) -__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,) -__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,) -__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q) -__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q) -__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q) -__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q) -__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q) -__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q) -__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q) -__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q) -__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q) -__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q) -__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q) -__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q) - -#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \ - lnsuffix, funcsuffix, Q) \ - typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype; \ - __extension__ static __inline void \ - __attribute__ ((__always_inline__)) \ - vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ - intype b, const int c) \ - { \ - __ST3_LANE_STRUCTURE_##intype *__p = \ - (__ST3_LANE_STRUCTURE_##intype *)ptr; \ - __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \ - "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \ - : "=Q"(*__p) \ - : "Q"(b), "i"(c) \ - : "v16", "v17", "v18"); \ - } - -__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,) -__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,) -__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,) -__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,) -__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,) -__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,) -__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,) -__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,) -__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,) -__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,) -__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,) -__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,) -__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q) -__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q) -__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q) -__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q) -__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q) -__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q) -__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q) -__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q) -__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q) -__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q) -__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q) -__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q) - -#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \ - lnsuffix, funcsuffix, Q) \ - typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype; \ - __extension__ static __inline void \ - __attribute__ ((__always_inline__)) \ - vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ - intype b, const int c) \ - { \ - __ST4_LANE_STRUCTURE_##intype *__p = \ - (__ST4_LANE_STRUCTURE_##intype *)ptr; \ - __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ - "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \ - : "=Q"(*__p) \ - : "Q"(b), "i"(c) \ - : "v16", "v17", "v18", "v19"); \ - } - -__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,) -__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,) -__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,) -__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,) -__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,) -__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,) -__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,) -__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,) -__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,) -__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,) -__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,) -__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,) -__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q) -__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q) -__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q) -__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q) -__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q) -__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q) -__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q) -__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q) -__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q) -__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q) -__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) -__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) +#define __ST2_LANE_FUNC(intype, largetype, ptrtype, \ + mode, ptr_mode, funcsuffix, signedtype) \ +__extension__ static __inline void \ +__attribute__ ((__always_inline__)) \ +vst2_lane_ ## funcsuffix (ptrtype *__ptr, \ + intype __b, const int __c) \ +{ \ + __builtin_aarch64_simd_oi __o; \ + largetype __temp; \ + __temp.val[0] \ + = vcombine_##funcsuffix (__b.val[0], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __temp.val[1] \ + = vcombine_##funcsuffix (__b.val[1], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __o = __builtin_aarch64_set_qregoi##mode (__o, \ + (signedtype) __temp.val[0], 0); \ + __o = __builtin_aarch64_set_qregoi##mode (__o, \ + (signedtype) __temp.val[1], 1); \ + __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __o, __c); \ +} + +__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32, + float32x4_t) +__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64, + float64x2_t) +__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t) +__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16, + int16x8_t) +__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t) +__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t) +__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t) +__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t) +__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t) +__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16, + int16x8_t) +__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32, + int32x4_t) +__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64, + int64x2_t) + +#undef __ST2_LANE_FUNC +#define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ +__extension__ static __inline void \ +__attribute__ ((__always_inline__)) \ +vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \ + intype __b, const int __c) \ +{ \ + union { intype __i; \ + __builtin_aarch64_simd_oi __o; } __temp = { __b }; \ + __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __temp.__o, __c); \ +} + +__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32) +__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64) +__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8) +__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16) +__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8) +__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16) +__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32) +__ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64) +__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8) +__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16) +__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32) +__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64) + +#define __ST3_LANE_FUNC(intype, largetype, ptrtype, \ + mode, ptr_mode, funcsuffix, signedtype) \ +__extension__ static __inline void \ +__attribute__ ((__always_inline__)) \ +vst3_lane_ ## funcsuffix (ptrtype *__ptr, \ + intype __b, const int __c) \ +{ \ + __builtin_aarch64_simd_ci __o; \ + largetype __temp; \ + __temp.val[0] \ + = vcombine_##funcsuffix (__b.val[0], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __temp.val[1] \ + = vcombine_##funcsuffix (__b.val[1], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __temp.val[2] \ + = vcombine_##funcsuffix (__b.val[2], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __o = __builtin_aarch64_set_qregci##mode (__o, \ + (signedtype) __temp.val[0], 0); \ + __o = __builtin_aarch64_set_qregci##mode (__o, \ + (signedtype) __temp.val[1], 1); \ + __o = __builtin_aarch64_set_qregci##mode (__o, \ + (signedtype) __temp.val[2], 2); \ + __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __o, __c); \ +} + +__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32, + float32x4_t) +__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64, + float64x2_t) +__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t) +__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16, + int16x8_t) +__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t) +__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t) +__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t) +__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t) +__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t) +__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16, + int16x8_t) +__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32, + int32x4_t) +__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64, + int64x2_t) + +#undef __ST3_LANE_FUNC +#define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ +__extension__ static __inline void \ +__attribute__ ((__always_inline__)) \ +vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \ + intype __b, const int __c) \ +{ \ + union { intype __i; \ + __builtin_aarch64_simd_ci __o; } __temp = { __b }; \ + __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __temp.__o, __c); \ +} + +__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32) +__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64) +__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8) +__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16) +__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8) +__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16) +__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32) +__ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64) +__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8) +__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16) +__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32) +__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64) + +#define __ST4_LANE_FUNC(intype, largetype, ptrtype, \ + mode, ptr_mode, funcsuffix, signedtype) \ +__extension__ static __inline void \ +__attribute__ ((__always_inline__)) \ +vst4_lane_ ## funcsuffix (ptrtype *__ptr, \ + intype __b, const int __c) \ +{ \ + __builtin_aarch64_simd_xi __o; \ + largetype __temp; \ + __temp.val[0] \ + = vcombine_##funcsuffix (__b.val[0], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __temp.val[1] \ + = vcombine_##funcsuffix (__b.val[1], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __temp.val[2] \ + = vcombine_##funcsuffix (__b.val[2], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __temp.val[3] \ + = vcombine_##funcsuffix (__b.val[3], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __o = __builtin_aarch64_set_qregxi##mode (__o, \ + (signedtype) __temp.val[0], 0); \ + __o = __builtin_aarch64_set_qregxi##mode (__o, \ + (signedtype) __temp.val[1], 1); \ + __o = __builtin_aarch64_set_qregxi##mode (__o, \ + (signedtype) __temp.val[2], 2); \ + __o = __builtin_aarch64_set_qregxi##mode (__o, \ + (signedtype) __temp.val[3], 3); \ + __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __o, __c); \ +} + +__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32, + float32x4_t) +__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64, + float64x2_t) +__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t) +__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16, + int16x8_t) +__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t) +__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t) +__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t) +__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t) +__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t) +__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16, + int16x8_t) +__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32, + int32x4_t) +__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64, + int64x2_t) + +#undef __ST4_LANE_FUNC +#define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ +__extension__ static __inline void \ +__attribute__ ((__always_inline__)) \ +vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \ + intype __b, const int __c) \ +{ \ + union { intype __i; \ + __builtin_aarch64_simd_xi __o; } __temp = { __b }; \ + __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __temp.__o, __c); \ +} + +__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32) +__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64) +__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8) +__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16) +__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8) +__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16) +__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32) +__ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64) +__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8) +__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16) +__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32) +__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64) __extension__ static __inline int64_t __attribute__ ((__always_inline__)) vaddlv_s32 (int32x2_t a) @@ -25614,10 +24784,880 @@ vuqaddd_s64 (int64x1_t __a, uint64x1_t __b) /* vuzp */ +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vuzp1_f32 (float32x2_t __a, float32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vuzp1_p8 (poly8x8_t __a, poly8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vuzp1_p16 (poly16x4_t __a, poly16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); +#endif +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vuzp1_s8 (int8x8_t __a, int8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vuzp1_s16 (int16x4_t __a, int16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); +#endif +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vuzp1_s32 (int32x2_t __a, int32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vuzp1_u8 (uint8x8_t __a, uint8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vuzp1_u16 (uint16x4_t __a, uint16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); +#endif +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vuzp1_u32 (uint32x2_t __a, uint32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vuzp1q_f32 (float32x4_t __a, float32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); +#endif +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vuzp1q_f64 (float64x2_t __a, float64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); +#endif +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vuzp1q_s8 (int8x16_t __a, int8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); +#endif +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vuzp1q_s16 (int16x8_t __a, int16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vuzp1q_s32 (int32x4_t __a, int32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); +#endif +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vuzp1q_s64 (int64x2_t __a, int64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); +#endif +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); +#endif +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vuzp2_f32 (float32x2_t __a, float32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vuzp2_p8 (poly8x8_t __a, poly8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vuzp2_p16 (poly16x4_t __a, poly16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); +#endif +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vuzp2_s8 (int8x8_t __a, int8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vuzp2_s16 (int16x4_t __a, int16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); +#endif +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vuzp2_s32 (int32x2_t __a, int32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vuzp2_u8 (uint8x8_t __a, uint8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vuzp2_u16 (uint16x4_t __a, uint16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); +#endif +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vuzp2_u32 (uint32x2_t __a, uint32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vuzp2q_f32 (float32x4_t __a, float32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); +#endif +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vuzp2q_f64 (float64x2_t __a, float64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); +#endif +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vuzp2q_s8 (int8x16_t __a, int8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); +#endif +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vuzp2q_s16 (int16x8_t __a, int16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vuzp2q_s32 (int32x4_t __a, int32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); +#endif +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vuzp2q_s64 (int64x2_t __a, int64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); +#endif +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); +#endif +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + __INTERLEAVE_LIST (uzp) /* vzip */ +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vzip1_f32 (float32x2_t __a, float32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vzip1_p8 (poly8x8_t __a, poly8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); +#endif +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vzip1_p16 (poly16x4_t __a, poly16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); +#endif +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vzip1_s8 (int8x8_t __a, int8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); +#endif +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vzip1_s16 (int16x4_t __a, int16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); +#endif +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vzip1_s32 (int32x2_t __a, int32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vzip1_u8 (uint8x8_t __a, uint8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); +#endif +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vzip1_u16 (uint16x4_t __a, uint16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); +#endif +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vzip1_u32 (uint32x2_t __a, uint32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vzip1q_f32 (float32x4_t __a, float32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); +#endif +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vzip1q_f64 (float64x2_t __a, float64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vzip1q_p8 (poly8x16_t __a, poly8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); +#endif +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vzip1q_p16 (poly16x8_t __a, poly16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) + {12, 4, 13, 5, 14, 6, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); +#endif +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vzip1q_s8 (int8x16_t __a, int8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); +#endif +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vzip1q_s16 (int16x8_t __a, int16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) + {12, 4, 13, 5, 14, 6, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); +#endif +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vzip1q_s32 (int32x4_t __a, int32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); +#endif +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vzip1q_s64 (int64x2_t __a, int64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vzip1q_u8 (uint8x16_t __a, uint8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); +#endif +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vzip1q_u16 (uint16x8_t __a, uint16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) + {12, 4, 13, 5, 14, 6, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); +#endif +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vzip1q_u32 (uint32x4_t __a, uint32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); +#endif +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vzip1q_u64 (uint64x2_t __a, uint64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vzip2_f32 (float32x2_t __a, float32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vzip2_p8 (poly8x8_t __a, poly8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); +#endif +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vzip2_p16 (poly16x4_t __a, poly16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); +#endif +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vzip2_s8 (int8x8_t __a, int8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); +#endif +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vzip2_s16 (int16x4_t __a, int16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); +#endif +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vzip2_s32 (int32x2_t __a, int32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vzip2_u8 (uint8x8_t __a, uint8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); +#endif +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vzip2_u16 (uint16x4_t __a, uint16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); +#endif +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vzip2_u32 (uint32x2_t __a, uint32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vzip2q_f32 (float32x4_t __a, float32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); +#endif +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vzip2q_f64 (float64x2_t __a, float64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vzip2q_p8 (poly8x16_t __a, poly8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); +#endif +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vzip2q_p16 (poly16x8_t __a, poly16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) + {4, 12, 5, 13, 6, 14, 7, 15}); +#endif +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vzip2q_s8 (int8x16_t __a, int8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); +#endif +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vzip2q_s16 (int16x8_t __a, int16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) + {4, 12, 5, 13, 6, 14, 7, 15}); +#endif +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vzip2q_s32 (int32x4_t __a, int32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); +#endif +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vzip2q_s64 (int64x2_t __a, int64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vzip2q_u8 (uint8x16_t __a, uint8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); +#endif +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vzip2q_u16 (uint16x8_t __a, uint16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) + {4, 12, 5, 13, 6, 14, 7, 15}); +#endif +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vzip2q_u32 (uint32x4_t __a, uint32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); +#endif +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vzip2q_u64 (uint64x2_t __a, uint64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + __INTERLEAVE_LIST (zip) #undef __INTERLEAVE_LIST diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index fd1eb482f0f..c537c3780ee 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -564,6 +564,32 @@ (define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")]) +;; Mode of pair of elements for each vector mode, to define transfer +;; size for structure lane/dup loads and stores. +(define_mode_attr V_TWO_ELEM [(V8QI "HI") (V16QI "HI") + (V4HI "SI") (V8HI "SI") + (V2SI "V2SI") (V4SI "V2SI") + (DI "V2DI") (V2DI "V2DI") + (V2SF "V2SF") (V4SF "V2SF") + (DF "V2DI") (V2DF "V2DI")]) + +;; Similar, for three elements. +(define_mode_attr V_THREE_ELEM [(V8QI "BLK") (V16QI "BLK") + (V4HI "BLK") (V8HI "BLK") + (V2SI "BLK") (V4SI "BLK") + (DI "EI") (V2DI "EI") + (V2SF "BLK") (V4SF "BLK") + (DF "EI") (V2DF "EI")]) + +;; Similar, for four elements. +(define_mode_attr V_FOUR_ELEM [(V8QI "SI") (V16QI "SI") + (V4HI "V4HI") (V8HI "V4HI") + (V2SI "V4SI") (V4SI "V4SI") + (DI "OI") (V2DI "OI") + (V2SF "V4SF") (V4SF "V4SF") + (DF "OI") (V2DF "OI")]) + + ;; Mode for atomic operation suffixes (define_mode_attr atomic_sfx [(QI "b") (HI "h") (SI "") (DI "")]) diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index 987a7c9a55a..58d95d832c5 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -996,7 +996,7 @@ arc_select_cc_mode (enum rtx_code op, rtx x, rtx y) if (GET_MODE_CLASS (mode) == MODE_INT && y == const0_rtx && (op == EQ || op == NE - || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x) <= 4)))) + || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x)) <= 4)))) return CC_ZNmode; /* add.f for if (a+b) */ @@ -1135,31 +1135,33 @@ arc_init_reg_tables (void) for (i = 0; i < NUM_MACHINE_MODES; i++) { - switch (GET_MODE_CLASS (i)) + enum machine_mode m = (enum machine_mode) i; + + switch (GET_MODE_CLASS (m)) { case MODE_INT: case MODE_PARTIAL_INT: case MODE_COMPLEX_INT: - if (GET_MODE_SIZE (i) <= 4) + if (GET_MODE_SIZE (m) <= 4) arc_mode_class[i] = 1 << (int) S_MODE; - else if (GET_MODE_SIZE (i) == 8) + else if (GET_MODE_SIZE (m) == 8) arc_mode_class[i] = 1 << (int) D_MODE; - else if (GET_MODE_SIZE (i) == 16) + else if (GET_MODE_SIZE (m) == 16) arc_mode_class[i] = 1 << (int) T_MODE; - else if (GET_MODE_SIZE (i) == 32) + else if (GET_MODE_SIZE (m) == 32) arc_mode_class[i] = 1 << (int) O_MODE; else arc_mode_class[i] = 0; break; case MODE_FLOAT: case MODE_COMPLEX_FLOAT: - if (GET_MODE_SIZE (i) <= 4) + if (GET_MODE_SIZE (m) <= 4) arc_mode_class[i] = 1 << (int) SF_MODE; - else if (GET_MODE_SIZE (i) == 8) + else if (GET_MODE_SIZE (m) == 8) arc_mode_class[i] = 1 << (int) DF_MODE; - else if (GET_MODE_SIZE (i) == 16) + else if (GET_MODE_SIZE (m) == 16) arc_mode_class[i] = 1 << (int) TF_MODE; - else if (GET_MODE_SIZE (i) == 32) + else if (GET_MODE_SIZE (m) == 32) arc_mode_class[i] = 1 << (int) OF_MODE; else arc_mode_class[i] = 0; diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt index 2deb9e77e13..1e98db97095 100644 --- a/gcc/config/arc/arc.opt +++ b/gcc/config/arc/arc.opt @@ -340,9 +340,9 @@ Pass -marclinux_prof option through to linker. ;; lra is still unproven for ARC, so allow to fall back to reload with -mno-lra. ;Target InverseMask(NO_LRA) -mlra ; lra still won't allow to configure libgcc; see PR rtl-optimization/55464. ; so don't enable by default. +mlra Target Mask(LRA) Enable lra diff --git a/gcc/config/i386/sol2-bi.h b/gcc/config/i386/sol2-bi.h deleted file mode 100644 index 66d17801f03..00000000000 --- a/gcc/config/i386/sol2-bi.h +++ /dev/null @@ -1,109 +0,0 @@ -/* Definitions of target machine for GCC, for bi-arch Solaris 2/x86. - Copyright (C) 2004-2014 Free Software Foundation, Inc. - Contributed by CodeSourcery, LLC. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -/* Override i386/sol2.h version: return 8-byte vectors in MMX registers if - possible, matching Sun Studio 12 Update 1+ compilers and other x86 - targets. */ -#undef TARGET_SUBTARGET_DEFAULT -#define TARGET_SUBTARGET_DEFAULT \ - (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS) - -#define SUBTARGET_OPTIMIZATION_OPTIONS \ - { OPT_LEVELS_1_PLUS, OPT_momit_leaf_frame_pointer, NULL, 1 } - -/* GNU as understands --32 and --64, but the native Solaris - assembler requires -xarch=generic or -xarch=generic64 instead. */ -#ifdef USE_GAS -#define ASM_CPU32_DEFAULT_SPEC "--32" -#define ASM_CPU64_DEFAULT_SPEC "--64" -#else -#define ASM_CPU32_DEFAULT_SPEC "-xarch=generic" -#define ASM_CPU64_DEFAULT_SPEC "-xarch=generic64" -#endif - -#undef ASM_CPU_SPEC -#define ASM_CPU_SPEC "%(asm_cpu_default)" - -/* Don't let i386/x86-64.h override i386/sol2.h version. Still cannot use - -K PIC with the Solaris 10+ assembler, it gives many warnings: - Absolute relocation is used for symbol "<symbol>" */ -#undef ASM_SPEC -#define ASM_SPEC ASM_SPEC_BASE - -/* We do not need to search a special directory for startup files. */ -#undef MD_STARTFILE_PREFIX - -#define DEFAULT_ARCH32_P !TARGET_64BIT_DEFAULT - -#define ARCH64_SUBDIR "amd64" - -#ifdef USE_GLD -/* Since binutils 2.21, GNU ld supports new *_sol2 emulations to strictly - follow the Solaris 2 ABI. Prefer them if present. */ -#ifdef HAVE_LD_SOL2_EMULATION -#define ARCH32_EMULATION "elf_i386_sol2" -#define ARCH64_EMULATION "elf_x86_64_sol2" -#else -#define ARCH32_EMULATION "elf_i386" -#define ARCH64_EMULATION "elf_x86_64" -#endif -#endif - -#undef ASM_COMMENT_START -#define ASM_COMMENT_START "/" - -/* The native Solaris assembler can't calculate the difference between - symbols in different sections, which causes problems for -fPIC jump - tables in .rodata. */ -#ifndef HAVE_AS_IX86_DIFF_SECT_DELTA -#undef JUMP_TABLES_IN_TEXT_SECTION -#define JUMP_TABLES_IN_TEXT_SECTION 1 - -/* The native Solaris assembler cannot handle the SYMBOL-. syntax, but - requires SYMBOL@rel/@rel64 instead. */ -#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL) \ - do { \ - fputs (integer_asm_op (SIZE, FALSE), FILE); \ - assemble_name (FILE, LABEL); \ - fputs (SIZE == 8 ? "@rel64" : "@rel", FILE); \ - } while (0) -#endif - -/* As in sol2.h, override the default from i386/x86-64.h to work around - Sun as TLS bug. */ -#undef ASM_OUTPUT_ALIGNED_COMMON -#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \ - do \ - { \ - if (TARGET_SUN_TLS \ - && in_section \ - && ((in_section->common.flags & SECTION_TLS) == SECTION_TLS)) \ - switch_to_section (bss_section); \ - x86_elf_aligned_common (FILE, NAME, SIZE, ALIGN); \ - } \ - while (0) - -#define USE_IX86_FRAME_POINTER 1 -#define USE_X86_64_FRAME_POINTER 1 - -#undef NO_PROFILE_COUNTERS - -#undef MCOUNT_NAME -#define MCOUNT_NAME "_mcount" diff --git a/gcc/config/i386/sol2.h b/gcc/config/i386/sol2.h index 6676941f67a..9c3a6f49662 100644 --- a/gcc/config/i386/sol2.h +++ b/gcc/config/i386/sol2.h @@ -18,12 +18,8 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ -/* Augment i386/unix.h version to return 8-byte vectors in memory, matching - Sun Studio compilers until version 12, the only ones supported on - Solaris 9. */ -#undef TARGET_SUBTARGET_DEFAULT -#define TARGET_SUBTARGET_DEFAULT \ - (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_VECT8_RETURNS) +#define SUBTARGET_OPTIMIZATION_OPTIONS \ + { OPT_LEVELS_1_PLUS, OPT_momit_leaf_frame_pointer, NULL, 1 } /* Old versions of the Solaris assembler can not handle the difference of labels in different sections, so force DW_EH_PE_datarel if so. */ @@ -50,27 +46,46 @@ along with GCC; see the file COPYING3. If not see #undef TARGET_SUN_TLS #define TARGET_SUN_TLS 1 -#undef SIZE_TYPE -#define SIZE_TYPE "unsigned int" - -#undef PTRDIFF_TYPE -#define PTRDIFF_TYPE "int" - /* Solaris 2/Intel as chokes on #line directives before Solaris 10. */ #undef CPP_SPEC #define CPP_SPEC "%{,assembler-with-cpp:-P} %(cpp_subtarget)" -#define ASM_CPU_DEFAULT_SPEC "" +/* GNU as understands --32 and --64, but the native Solaris + assembler requires -xarch=generic or -xarch=generic64 instead. */ +#ifdef USE_GAS +#define ASM_CPU32_DEFAULT_SPEC "--32" +#define ASM_CPU64_DEFAULT_SPEC "--64" +#else +#define ASM_CPU32_DEFAULT_SPEC "-xarch=generic" +#define ASM_CPU64_DEFAULT_SPEC "-xarch=generic64" +#endif + +#undef ASM_CPU_SPEC +#define ASM_CPU_SPEC "%(asm_cpu_default)" -#define ASM_CPU_SPEC "" - -/* Don't include ASM_PIC_SPEC. While the Solaris 9 assembler accepts - -K PIC, it gives many warnings: - R_386_32 relocation is used for symbol "<symbol>" +/* Don't include ASM_PIC_SPEC. While the Solaris 10+ assembler accepts -K PIC, + it gives many warnings: + Absolute relocation is used for symbol "<symbol>" GNU as doesn't recognize -K at all. */ #undef ASM_SPEC #define ASM_SPEC ASM_SPEC_BASE +#define DEFAULT_ARCH32_P !TARGET_64BIT_DEFAULT + +#define ARCH64_SUBDIR "amd64" + +#ifdef USE_GLD +/* Since binutils 2.21, GNU ld supports new *_sol2 emulations to strictly + follow the Solaris 2 ABI. Prefer them if present. */ +#ifdef HAVE_LD_SOL2_EMULATION +#define ARCH32_EMULATION "elf_i386_sol2" +#define ARCH64_EMULATION "elf_x86_64_sol2" +#else +#define ARCH32_EMULATION "elf_i386" +#define ARCH64_EMULATION "elf_x86_64" +#endif +#endif + #undef ENDFILE_SPEC #define ENDFILE_SPEC \ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ @@ -84,23 +99,39 @@ along with GCC; see the file COPYING3. If not see { "asm_cpu", ASM_CPU_SPEC }, \ { "asm_cpu_default", ASM_CPU_DEFAULT_SPEC }, \ -#undef SUBTARGET_EXTRA_SPECS -#define SUBTARGET_EXTRA_SPECS \ - { "startfile_arch", STARTFILE_ARCH_SPEC }, \ - { "link_arch", LINK_ARCH_SPEC }, \ - SUBTARGET_CPU_EXTRA_SPECS - /* Register the Solaris-specific #pragma directives. */ #define REGISTER_SUBTARGET_PRAGMAS() solaris_register_pragmas () #undef LOCAL_LABEL_PREFIX #define LOCAL_LABEL_PREFIX "." +/* The Solaris 10 FCS as doesn't accept "#" comments, while later versions + do. */ +#undef ASM_COMMENT_START +#define ASM_COMMENT_START "/" + /* The 32-bit Solaris assembler does not support .quad. Do not use it. */ #ifndef HAVE_AS_IX86_QUAD #undef ASM_QUAD #endif +/* The native Solaris assembler can't calculate the difference between + symbols in different sections, which causes problems for -fPIC jump + tables in .rodata. */ +#ifndef HAVE_AS_IX86_DIFF_SECT_DELTA +#undef JUMP_TABLES_IN_TEXT_SECTION +#define JUMP_TABLES_IN_TEXT_SECTION 1 + +/* The native Solaris assembler cannot handle the SYMBOL-. syntax, but + requires SYMBOL@rel/@rel64 instead. */ +#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL) \ + do { \ + fputs (integer_asm_op (SIZE, FALSE), FILE); \ + assemble_name (FILE, LABEL); \ + fputs (SIZE == 8 ? "@rel64" : "@rel", FILE); \ + } while (0) +#endif + /* The Solaris assembler wants a .local for non-exported aliases. */ #define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET) \ do { \ @@ -148,6 +179,20 @@ along with GCC; see the file COPYING3. If not see while (0) #endif /* !USE_GAS */ +/* As in sparc/sol2.h, override the default from i386/x86-64.h to work + around Sun as TLS bug. */ +#undef ASM_OUTPUT_ALIGNED_COMMON +#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \ + do \ + { \ + if (TARGET_SUN_TLS \ + && in_section \ + && ((in_section->common.flags & SECTION_TLS) == SECTION_TLS)) \ + switch_to_section (bss_section); \ + x86_elf_aligned_common (FILE, NAME, SIZE, ALIGN); \ + } \ + while (0) + /* Output a simple call for .init/.fini. */ #define ASM_OUTPUT_CALL(FILE, FN) \ do \ @@ -174,6 +219,14 @@ along with GCC; see the file COPYING3. If not see #define DTORS_SECTION_ASM_OP "\t.section\t.dtors, \"aw\"" #endif +#define USE_IX86_FRAME_POINTER 1 +#define USE_X86_64_FRAME_POINTER 1 + +#undef NO_PROFILE_COUNTERS + +#undef MCOUNT_NAME +#define MCOUNT_NAME "_mcount" + /* We do not need NT_VERSION notes. */ #undef X86_FILE_START_VERSION_DIRECTIVE #define X86_FILE_START_VERSION_DIRECTIVE false diff --git a/gcc/config/i386/t-sol2-64 b/gcc/config/i386/t-sol2 index 4e70f0bed27..4e70f0bed27 100644 --- a/gcc/config/i386/t-sol2-64 +++ b/gcc/config/i386/t-sol2 diff --git a/gcc/config/m32c/m32c.c b/gcc/config/m32c/m32c.c index 57cfb20ee16..837c22b2d77 100644 --- a/gcc/config/m32c/m32c.c +++ b/gcc/config/m32c/m32c.c @@ -3159,7 +3159,7 @@ m32c_illegal_subreg_p (rtx op) { int offset; unsigned int i; - int src_mode, dest_mode; + enum machine_mode src_mode, dest_mode; if (GET_CODE (op) == MEM && ! m32c_legitimate_address_p (Pmode, XEXP (op, 0), false)) diff --git a/gcc/config/m32r/m32r.c b/gcc/config/m32r/m32r.c index 83bc3a7bf3a..2b84b0ff1eb 100644 --- a/gcc/config/m32r/m32r.c +++ b/gcc/config/m32r/m32r.c @@ -282,31 +282,33 @@ init_reg_tables (void) for (i = 0; i < NUM_MACHINE_MODES; i++) { - switch (GET_MODE_CLASS (i)) + enum machine_mode m = (enum machine_mode) i; + + switch (GET_MODE_CLASS (m)) { case MODE_INT: case MODE_PARTIAL_INT: case MODE_COMPLEX_INT: - if (GET_MODE_SIZE (i) <= 4) + if (GET_MODE_SIZE (m) <= 4) m32r_mode_class[i] = 1 << (int) S_MODE; - else if (GET_MODE_SIZE (i) == 8) + else if (GET_MODE_SIZE (m) == 8) m32r_mode_class[i] = 1 << (int) D_MODE; - else if (GET_MODE_SIZE (i) == 16) + else if (GET_MODE_SIZE (m) == 16) m32r_mode_class[i] = 1 << (int) T_MODE; - else if (GET_MODE_SIZE (i) == 32) + else if (GET_MODE_SIZE (m) == 32) m32r_mode_class[i] = 1 << (int) O_MODE; else m32r_mode_class[i] = 0; break; case MODE_FLOAT: case MODE_COMPLEX_FLOAT: - if (GET_MODE_SIZE (i) <= 4) + if (GET_MODE_SIZE (m) <= 4) m32r_mode_class[i] = 1 << (int) SF_MODE; - else if (GET_MODE_SIZE (i) == 8) + else if (GET_MODE_SIZE (m) == 8) m32r_mode_class[i] = 1 << (int) DF_MODE; - else if (GET_MODE_SIZE (i) == 16) + else if (GET_MODE_SIZE (m) == 16) m32r_mode_class[i] = 1 << (int) TF_MODE; - else if (GET_MODE_SIZE (i) == 32) + else if (GET_MODE_SIZE (m) == 32) m32r_mode_class[i] = 1 << (int) OF_MODE; else m32r_mode_class[i] = 0; diff --git a/gcc/config/msp430/msp430.c b/gcc/config/msp430/msp430.c index a637e27d41b..1ec96526efd 100644 --- a/gcc/config/msp430/msp430.c +++ b/gcc/config/msp430/msp430.c @@ -2162,7 +2162,7 @@ msp430_print_operand (FILE * file, rtx op, int letter) because builtins are expanded before the frame layout is determined. */ fprintf (file, "%d", msp430_initial_elimination_offset (ARG_POINTER_REGNUM, STACK_POINTER_REGNUM) - - 2); + - (TARGET_LARGE ? 4 : 2)); return; case 'J': diff --git a/gcc/config/msp430/msp430.md b/gcc/config/msp430/msp430.md index 5d930c37901..74a98b48019 100644 --- a/gcc/config/msp430/msp430.md +++ b/gcc/config/msp430/msp430.md @@ -1321,7 +1321,7 @@ [(set (match_operand:SI 0 "register_operand" "=r") (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%0")) (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))] - "optimize > 2 && msp430_hwmult_type != NONE && ! msp430_is_interrupt_func ()" + "optimize > 2 && msp430_hwmult_type != NONE" "* if (msp430_use_f5_series_hwmult ()) return \"PUSH.W sr { DINT { MOV.W %1, &0x04C2 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0 { POP.W sr\"; @@ -1365,6 +1365,6 @@ if (msp430_use_f5_series_hwmult ()) return \"PUSH.W sr { DINT { MOV.W %L1, &0x04D0 { MOV.W %H1, &0x04D2 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0 { POP.W sr\"; else - return \"PUSH.W sr { DINT { MOV.W %L1, &0x0140 { MOV.W %H1, &0x0141 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\"; + return \"PUSH.W sr { DINT { MOV.W %L1, &0x0140 { MOV.W %H1, &0x0142 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\"; " ) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 1616b888c9c..47050c3d03e 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -629,14 +629,14 @@ (match_test "offsettable_nonstrict_memref_p (op)"))) ;; Return 1 if the operand is suitable for load/store quad memory. -;; This predicate only checks for non-atomic loads/stores. +;; This predicate only checks for non-atomic loads/stores (not lqarx/stqcx). (define_predicate "quad_memory_operand" (match_code "mem") { rtx addr, op0, op1; int ret; - if (!TARGET_QUAD_MEMORY) + if (!TARGET_QUAD_MEMORY && !TARGET_SYNC_TI) ret = 0; else if (!memory_operand (op, mode)) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 937eabf3727..f6da9b3a382 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -134,6 +134,7 @@ UNSPEC_DIVEUO UNSPEC_UNPACK_128BIT UNSPEC_PACK_128BIT + UNSPEC_LSQ ]) ;; diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md index 7db439074cd..63152ed04d2 100644 --- a/gcc/config/rs6000/sync.md +++ b/gcc/config/rs6000/sync.md @@ -107,10 +107,17 @@ "isync" [(set_attr "type" "isync")]) +;; Types that we should provide atomic instructions for. +(define_mode_iterator AINT [QI + HI + SI + (DI "TARGET_POWERPC64") + (TI "TARGET_SYNC_TI")]) + ;; The control dependency used for load dependency described ;; in B.2.3 of the Power ISA 2.06B. (define_insn "loadsync_<mode>" - [(unspec_volatile:BLK [(match_operand:INT1 0 "register_operand" "r")] + [(unspec_volatile:BLK [(match_operand:AINT 0 "register_operand" "r")] UNSPECV_ISYNC) (clobber (match_scratch:CC 1 "=y"))] "" @@ -118,18 +125,56 @@ [(set_attr "type" "isync") (set_attr "length" "12")]) +(define_insn "load_quadpti" + [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r") + (unspec:PTI + [(match_operand:TI 1 "quad_memory_operand" "wQ")] UNSPEC_LSQ))] + "TARGET_SYNC_TI + && !reg_mentioned_p (operands[0], operands[1])" + "lq %0,%1" + [(set_attr "type" "load") + (set_attr "length" "4")]) + (define_expand "atomic_load<mode>" - [(set (match_operand:INT1 0 "register_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "")) ;; memory + [(set (match_operand:AINT 0 "register_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "")) ;; memory (use (match_operand:SI 2 "const_int_operand" ""))] ;; model "" { + if (<MODE>mode == TImode && !TARGET_SYNC_TI) + FAIL; + enum memmodel model = (enum memmodel) INTVAL (operands[2]); if (model == MEMMODEL_SEQ_CST) emit_insn (gen_hwsync ()); - emit_move_insn (operands[0], operands[1]); + if (<MODE>mode != TImode) + emit_move_insn (operands[0], operands[1]); + else + { + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx pti_reg = gen_reg_rtx (PTImode); + + // Can't have indexed address for 'lq' + if (indexed_address (XEXP (op1, 0), TImode)) + { + rtx old_addr = XEXP (op1, 0); + rtx new_addr = force_reg (Pmode, old_addr); + operands[1] = op1 = replace_equiv_address (op1, new_addr); + } + + emit_insn (gen_load_quadpti (pti_reg, op1)); + + if (WORDS_BIG_ENDIAN) + emit_move_insn (op0, gen_lowpart (TImode, pti_reg)); + else + { + emit_move_insn (gen_lowpart (DImode, op0), gen_highpart (DImode, pti_reg)); + emit_move_insn (gen_highpart (DImode, op0), gen_lowpart (DImode, pti_reg)); + } + } switch (model) { @@ -146,12 +191,24 @@ DONE; }) +(define_insn "store_quadpti" + [(set (match_operand:PTI 0 "quad_memory_operand" "=wQ") + (unspec:PTI + [(match_operand:PTI 1 "quad_int_reg_operand" "r")] UNSPEC_LSQ))] + "TARGET_SYNC_TI" + "stq %1,%0" + [(set_attr "type" "store") + (set_attr "length" "4")]) + (define_expand "atomic_store<mode>" - [(set (match_operand:INT1 0 "memory_operand" "") ;; memory - (match_operand:INT1 1 "register_operand" "")) ;; input + [(set (match_operand:AINT 0 "memory_operand" "") ;; memory + (match_operand:AINT 1 "register_operand" "")) ;; input (use (match_operand:SI 2 "const_int_operand" ""))] ;; model "" { + if (<MODE>mode == TImode && !TARGET_SYNC_TI) + FAIL; + enum memmodel model = (enum memmodel) INTVAL (operands[2]); switch (model) { @@ -166,7 +223,33 @@ default: gcc_unreachable (); } - emit_move_insn (operands[0], operands[1]); + if (<MODE>mode != TImode) + emit_move_insn (operands[0], operands[1]); + else + { + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx pti_reg = gen_reg_rtx (PTImode); + + // Can't have indexed address for 'stq' + if (indexed_address (XEXP (op0, 0), TImode)) + { + rtx old_addr = XEXP (op0, 0); + rtx new_addr = force_reg (Pmode, old_addr); + operands[0] = op0 = replace_equiv_address (op0, new_addr); + } + + if (WORDS_BIG_ENDIAN) + emit_move_insn (pti_reg, gen_lowpart (PTImode, op1)); + else + { + emit_move_insn (gen_lowpart (DImode, pti_reg), gen_highpart (DImode, op1)); + emit_move_insn (gen_highpart (DImode, pti_reg), gen_lowpart (DImode, op1)); + } + + emit_insn (gen_store_quadpti (gen_lowpart (PTImode, op0), pti_reg)); + } + DONE; }) @@ -180,14 +263,6 @@ SI (DI "TARGET_POWERPC64")]) -;; Types that we should provide atomic instructions for. - -(define_mode_iterator AINT [QI - HI - SI - (DI "TARGET_POWERPC64") - (TI "TARGET_SYNC_TI")]) - (define_insn "load_locked<mode>" [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r") (unspec_volatile:ATOMIC diff --git a/gcc/config/sol2-10.h b/gcc/config/sol2-10.h deleted file mode 100644 index 4488a40cba6..00000000000 --- a/gcc/config/sol2-10.h +++ /dev/null @@ -1,24 +0,0 @@ -/* Operating system specific defines to be used when targeting GCC for any - Solaris 2 system starting from Solaris 10. - Copyright (C) 2006-2014 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -/* Solaris 10 has the float and long double forms of math functions. - We redefine this hook so the version from elfos.h header won't be used. */ -#undef TARGET_LIBC_HAS_FUNCTION -#define TARGET_LIBC_HAS_FUNCTION default_libc_has_function diff --git a/gcc/config/sol2-bi.h b/gcc/config/sol2-bi.h deleted file mode 100644 index fdb2a28178c..00000000000 --- a/gcc/config/sol2-bi.h +++ /dev/null @@ -1,135 +0,0 @@ -/* Definitions of target machine for GCC, for bi-arch Solaris 2. - Copyright (C) 2011-2014 Free Software Foundation, Inc. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 3, or (at your option) any later - version. - - GCC is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -/* wchar_t is called differently in <wchar.h> for 32 and 64-bit - compilations. This is called for by SCD 2.4.1, p. 6-83, Figure 6-65 - (32-bit) and p. 6P-10, Figure 6.38 (64-bit). */ - -#undef WCHAR_TYPE -#define WCHAR_TYPE (TARGET_64BIT ? "int" : "long int") - -#undef WCHAR_TYPE_SIZE -#define WCHAR_TYPE_SIZE 32 - -/* Same for wint_t. See SCD 2.4.1, p. 6-83, Figure 6-66 (32-bit). There's - no corresponding 64-bit definition, but this is what Solaris 8 - <iso/wchar_iso.h> uses. */ - -#undef WINT_TYPE -#define WINT_TYPE (TARGET_64BIT ? "int" : "long int") - -#undef WINT_TYPE_SIZE -#define WINT_TYPE_SIZE 32 - -#if DEFAULT_ARCH32_P -#define MULTILIB_DEFAULTS { "m32" } -#else -#define MULTILIB_DEFAULTS { "m64" } -#endif - -#if DEFAULT_ARCH32_P -#define DEF_ARCH32_SPEC(__str) "%{!m64:" __str "}" -#define DEF_ARCH64_SPEC(__str) "%{m64:" __str "}" -#else -#define DEF_ARCH32_SPEC(__str) "%{m32:" __str "}" -#define DEF_ARCH64_SPEC(__str) "%{!m32:" __str "}" -#endif - -#undef ASM_CPU_DEFAULT_SPEC -#define ASM_CPU_DEFAULT_SPEC \ -(DEFAULT_ARCH32_P ? "\ -%{m64:" ASM_CPU64_DEFAULT_SPEC "} \ -%{!m64:" ASM_CPU32_DEFAULT_SPEC "} \ -" : "\ -%{m32:" ASM_CPU32_DEFAULT_SPEC "} \ -%{!m32:" ASM_CPU64_DEFAULT_SPEC "} \ -") - -/* This should be the same as LINK_ARCH32_SPEC_BASE, except with - ARCH64_SUBDIR appended to the paths and /usr/ccs/lib is no longer - necessary. */ -#undef LINK_ARCH64_SPEC_BASE -#define LINK_ARCH64_SPEC_BASE \ - "%{G:-G} \ - %{YP,*} \ - %{R*} \ - %{!YP,*:%{p|pg:-Y P,%R/usr/lib/libp/" ARCH64_SUBDIR ":%R/lib/" ARCH64_SUBDIR ":%R/usr/lib/" ARCH64_SUBDIR "} \ - %{!p:%{!pg:-Y P,%R/lib/" ARCH64_SUBDIR ":%R/usr/lib/" ARCH64_SUBDIR "}}}" - -#undef LINK_ARCH64_SPEC -#ifndef USE_GLD -/* FIXME: Used to be SPARC-only. Not SPARC-specfic but for the model name! */ -#define LINK_ARCH64_SPEC \ - "%{mcmodel=medlow:-M /usr/lib/ld/" ARCH64_SUBDIR "/map.below4G} " \ - LINK_ARCH64_SPEC_BASE -#else -#define LINK_ARCH64_SPEC LINK_ARCH64_SPEC_BASE -#endif - -#ifdef USE_GLD -#if DEFAULT_ARCH32_P -#define ARCH_DEFAULT_EMULATION ARCH32_EMULATION -#else -#define ARCH_DEFAULT_EMULATION ARCH64_EMULATION -#endif -#define TARGET_LD_EMULATION "%{m32:-m " ARCH32_EMULATION "}" \ - "%{m64:-m " ARCH64_EMULATION "}" \ - "%{!m32:%{!m64:-m " ARCH_DEFAULT_EMULATION "}} " -#else -#define TARGET_LD_EMULATION "" -#endif - -#undef LINK_ARCH_SPEC -#if DISABLE_MULTILIB -#if DEFAULT_ARCH32_P -#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \ -%{m32:%(link_arch32)} \ -%{m64:%edoes not support multilib} \ -%{!m32:%{!m64:%(link_arch_default)}} \ -" -#else -#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \ -%{m32:%edoes not support multilib} \ -%{m64:%(link_arch64)} \ -%{!m32:%{!m64:%(link_arch_default)}} \ -" -#endif -#else -#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \ -%{m32:%(link_arch32)} \ -%{m64:%(link_arch64)} \ -%{!m32:%{!m64:%(link_arch_default)}}" -#endif - -#define LINK_ARCH_DEFAULT_SPEC \ -(DEFAULT_ARCH32_P ? LINK_ARCH32_SPEC : LINK_ARCH64_SPEC) - -#undef SUBTARGET_EXTRA_SPECS -#define SUBTARGET_EXTRA_SPECS \ - { "startfile_arch", STARTFILE_ARCH_SPEC }, \ - { "link_arch32", LINK_ARCH32_SPEC }, \ - { "link_arch64", LINK_ARCH64_SPEC }, \ - { "link_arch_default", LINK_ARCH_DEFAULT_SPEC }, \ - { "link_arch", LINK_ARCH_SPEC }, \ - SUBTARGET_CPU_EXTRA_SPECS diff --git a/gcc/config/sol2.h b/gcc/config/sol2.h index a21c953b035..d88de37bd2e 100644 --- a/gcc/config/sol2.h +++ b/gcc/config/sol2.h @@ -21,20 +21,25 @@ along with GCC; see the file COPYING3. If not see /* We are compiling for Solaris 2 now. */ #define TARGET_SOLARIS 1 -/* Solaris 2 (at least as of 2.5.1) uses a 32-bit wchar_t. */ +/* wchar_t is called differently in <wchar.h> for 32 and 64-bit + compilations. This is called for by SCD 2.4.1, p. 6-83, Figure 6-65 + (32-bit) and p. 6P-10, Figure 6.38 (64-bit). */ + #undef WCHAR_TYPE -#define WCHAR_TYPE "long int" +#define WCHAR_TYPE (TARGET_64BIT ? "int" : "long int") #undef WCHAR_TYPE_SIZE -#define WCHAR_TYPE_SIZE BITS_PER_WORD +#define WCHAR_TYPE_SIZE 32 + +/* Same for wint_t. See SCD 2.4.1, p. 6-83, Figure 6-66 (32-bit). There's + no corresponding 64-bit definition, but this is what Solaris 8 + <iso/wchar_iso.h> uses. */ -/* Solaris 2 uses a wint_t different from the default. This is required - by the SCD 2.4.1, p. 6-83, Figure 6-66. */ -#undef WINT_TYPE -#define WINT_TYPE "long int" +#undef WINT_TYPE +#define WINT_TYPE (TARGET_64BIT ? "int" : "long int") -#undef WINT_TYPE_SIZE -#define WINT_TYPE_SIZE BITS_PER_WORD +#undef WINT_TYPE_SIZE +#define WINT_TYPE_SIZE 32 #define SIG_ATOMIC_TYPE "int" @@ -103,6 +108,19 @@ along with GCC; see the file COPYING3. If not see solaris_override_options (); \ } while (0) +#if DEFAULT_ARCH32_P +#define MULTILIB_DEFAULTS { "m32" } +#else +#define MULTILIB_DEFAULTS { "m64" } +#endif + +#if DEFAULT_ARCH32_P +#define DEF_ARCH32_SPEC(__str) "%{!m64:" __str "}" +#define DEF_ARCH64_SPEC(__str) "%{m64:" __str "}" +#else +#define DEF_ARCH32_SPEC(__str) "%{m32:" __str "}" +#define DEF_ARCH64_SPEC(__str) "%{!m32:" __str "}" +#endif /* It's safe to pass -s always, even if -g is not used. Those options are handled by both Sun as and GNU as. */ @@ -111,6 +129,16 @@ along with GCC; see the file COPYING3. If not see #define ASM_PIC_SPEC " %{fpic|fpie|fPIC|fPIE:-K PIC}" +#undef ASM_CPU_DEFAULT_SPEC +#define ASM_CPU_DEFAULT_SPEC \ +(DEFAULT_ARCH32_P ? "\ +%{m64:" ASM_CPU64_DEFAULT_SPEC "} \ +%{!m64:" ASM_CPU32_DEFAULT_SPEC "} \ +" : "\ +%{m32:" ASM_CPU32_DEFAULT_SPEC "} \ +%{!m32:" ASM_CPU64_DEFAULT_SPEC "} \ +") + #undef LIB_SPEC #define LIB_SPEC \ "%{!symbolic:\ @@ -120,17 +148,11 @@ along with GCC; see the file COPYING3. If not see #ifndef CROSS_DIRECTORY_STRUCTURE #undef MD_EXEC_PREFIX #define MD_EXEC_PREFIX "/usr/ccs/bin/" - -#undef MD_STARTFILE_PREFIX -#define MD_STARTFILE_PREFIX "/usr/ccs/lib/" #endif -#undef STARTFILE_ARCH32_SPEC -#define STARTFILE_ARCH32_SPEC "%{ansi:values-Xc.o%s} \ - %{!ansi:values-Xa.o%s}" - #undef STARTFILE_ARCH_SPEC -#define STARTFILE_ARCH_SPEC STARTFILE_ARCH32_SPEC +#define STARTFILE_ARCH_SPEC "%{ansi:values-Xc.o%s} \ + %{!ansi:values-Xa.o%s}" /* We don't use the standard svr4 STARTFILE_SPEC because it's wrong for us. */ #undef STARTFILE_SPEC @@ -153,14 +175,78 @@ along with GCC; see the file COPYING3. If not see "%{G:-G} \ %{YP,*} \ %{R*} \ - %{!YP,*:%{p|pg:-Y P,%R/usr/ccs/lib/libp:%R/usr/lib/libp:%R/usr/ccs/lib:%R/lib:%R/usr/lib} \ - %{!p:%{!pg:-Y P,%R/usr/ccs/lib:%R/lib:%R/usr/lib}}}" + %{!YP,*:%{p|pg:-Y P,%R/usr/lib/libp%R/lib:%R/usr/lib} \ + %{!p:%{!pg:-Y P,%R/lib:%R/usr/lib}}}" #undef LINK_ARCH32_SPEC #define LINK_ARCH32_SPEC LINK_ARCH32_SPEC_BASE +/* This should be the same as LINK_ARCH32_SPEC_BASE, except with + ARCH64_SUBDIR appended to the paths. */ +#undef LINK_ARCH64_SPEC_BASE +#define LINK_ARCH64_SPEC_BASE \ + "%{G:-G} \ + %{YP,*} \ + %{R*} \ + %{!YP,*:%{p|pg:-Y P,%R/usr/lib/libp/" ARCH64_SUBDIR ":%R/lib/" ARCH64_SUBDIR ":%R/usr/lib/" ARCH64_SUBDIR "} \ + %{!p:%{!pg:-Y P,%R/lib/" ARCH64_SUBDIR ":%R/usr/lib/" ARCH64_SUBDIR "}}}" + +#undef LINK_ARCH64_SPEC +#ifndef USE_GLD +/* FIXME: Used to be SPARC-only. Not SPARC-specfic but for the model name! */ +#define LINK_ARCH64_SPEC \ + "%{mcmodel=medlow:-M /usr/lib/ld/" ARCH64_SUBDIR "/map.below4G} " \ + LINK_ARCH64_SPEC_BASE +#else +#define LINK_ARCH64_SPEC LINK_ARCH64_SPEC_BASE +#endif + +#ifdef USE_GLD +#if DEFAULT_ARCH32_P +#define ARCH_DEFAULT_EMULATION ARCH32_EMULATION +#else +#define ARCH_DEFAULT_EMULATION ARCH64_EMULATION +#endif +#define TARGET_LD_EMULATION "%{m32:-m " ARCH32_EMULATION "}" \ + "%{m64:-m " ARCH64_EMULATION "}" \ + "%{!m32:%{!m64:-m " ARCH_DEFAULT_EMULATION "}} " +#else +#define TARGET_LD_EMULATION "" +#endif + #undef LINK_ARCH_SPEC -#define LINK_ARCH_SPEC LINK_ARCH32_SPEC +#if DISABLE_MULTILIB +#if DEFAULT_ARCH32_P +#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \ +%{m32:%(link_arch32)} \ +%{m64:%edoes not support multilib} \ +%{!m32:%{!m64:%(link_arch_default)}} \ +" +#else +#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \ +%{m32:%edoes not support multilib} \ +%{m64:%(link_arch64)} \ +%{!m32:%{!m64:%(link_arch_default)}} \ +" +#endif +#else +#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \ +%{m32:%(link_arch32)} \ +%{m64:%(link_arch64)} \ +%{!m32:%{!m64:%(link_arch_default)}}" +#endif + +#define LINK_ARCH_DEFAULT_SPEC \ +(DEFAULT_ARCH32_P ? LINK_ARCH32_SPEC : LINK_ARCH64_SPEC) + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "startfile_arch", STARTFILE_ARCH_SPEC }, \ + { "link_arch32", LINK_ARCH32_SPEC }, \ + { "link_arch64", LINK_ARCH64_SPEC }, \ + { "link_arch_default", LINK_ARCH_DEFAULT_SPEC }, \ + { "link_arch", LINK_ARCH_SPEC }, \ + SUBTARGET_CPU_EXTRA_SPECS /* C++11 programs need -lrt for nanosleep. */ #define TIME_LIBRARY "rt" @@ -235,7 +321,9 @@ along with GCC; see the file COPYING3. If not see #define TARGET_CXX_DECL_MANGLING_CONTEXT solaris_cxx_decl_mangling_context /* Solaris/x86 as and gas support unquoted section names. */ +#ifndef SECTION_NAME_FORMAT #define SECTION_NAME_FORMAT "%s" +#endif /* This is how to declare the size of a function. For Solaris, we output any .init or .fini entries here. */ @@ -249,23 +337,6 @@ along with GCC; see the file COPYING3. If not see } \ while (0) -/* Solaris as has a bug: a .common directive in .tbss or .tdata section - behaves as .tls_common rather than normal non-TLS .common. */ -#undef ASM_OUTPUT_ALIGNED_COMMON -#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \ - do \ - { \ - if (TARGET_SUN_TLS \ - && in_section \ - && ((in_section->common.flags & SECTION_TLS) == SECTION_TLS)) \ - switch_to_section (bss_section); \ - fprintf ((FILE), "%s", COMMON_ASM_OP); \ - assemble_name ((FILE), (NAME)); \ - fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", \ - (SIZE), (ALIGN) / BITS_PER_UNIT); \ - } \ - while (0) - #ifndef USE_GAS #undef TARGET_ASM_ASSEMBLE_VISIBILITY #define TARGET_ASM_ASSEMBLE_VISIBILITY solaris_assemble_visibility @@ -291,8 +362,10 @@ along with GCC; see the file COPYING3. If not see #define TARGET_POSIX_IO +/* Solaris 10 has the float and long double forms of math functions. + We redefine this hook so the version from elfos.h header won't be used. */ #undef TARGET_LIBC_HAS_FUNCTION -#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function +#define TARGET_LIBC_HAS_FUNCTION default_libc_has_function extern GTY(()) tree solaris_pending_aligns; extern GTY(()) tree solaris_pending_inits; diff --git a/gcc/config/sparc/sol2.h b/gcc/config/sparc/sol2.h index b50a937b26f..2fcdc85c81f 100644 --- a/gcc/config/sparc/sol2.h +++ b/gcc/config/sparc/sol2.h @@ -109,8 +109,6 @@ along with GCC; see the file COPYING3. If not see #define CPP_CPU64_DEFAULT_SPEC "" #undef ASM_CPU32_DEFAULT_SPEC #define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plus" -#undef ASM_CPU_DEFAULT_SPEC -#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC #endif #if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc @@ -120,8 +118,6 @@ along with GCC; see the file COPYING3. If not see #define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusa" #undef ASM_CPU64_DEFAULT_SPEC #define ASM_CPU64_DEFAULT_SPEC "-xarch=v9a" -#undef ASM_CPU_DEFAULT_SPEC -#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC #endif #if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3 @@ -131,8 +127,6 @@ along with GCC; see the file COPYING3. If not see #define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb" #undef ASM_CPU64_DEFAULT_SPEC #define ASM_CPU64_DEFAULT_SPEC "-xarch=v9b" -#undef ASM_CPU_DEFAULT_SPEC -#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC #endif #if TARGET_CPU_DEFAULT == TARGET_CPU_niagara @@ -142,8 +136,6 @@ along with GCC; see the file COPYING3. If not see #define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb" #undef ASM_CPU64_DEFAULT_SPEC #define ASM_CPU64_DEFAULT_SPEC "-xarch=v9b" -#undef ASM_CPU_DEFAULT_SPEC -#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC #endif #if TARGET_CPU_DEFAULT == TARGET_CPU_niagara2 @@ -153,8 +145,6 @@ along with GCC; see the file COPYING3. If not see #define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb" #undef ASM_CPU64_DEFAULT_SPEC #define ASM_CPU64_DEFAULT_SPEC "-xarch=v9b" -#undef ASM_CPU_DEFAULT_SPEC -#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC #endif #if TARGET_CPU_DEFAULT == TARGET_CPU_niagara3 @@ -164,8 +154,6 @@ along with GCC; see the file COPYING3. If not see #define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plus" AS_NIAGARA3_FLAG #undef ASM_CPU64_DEFAULT_SPEC #define ASM_CPU64_DEFAULT_SPEC "-xarch=v9" AS_NIAGARA3_FLAG -#undef ASM_CPU_DEFAULT_SPEC -#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC #endif #if TARGET_CPU_DEFAULT == TARGET_CPU_niagara4 @@ -175,8 +163,6 @@ along with GCC; see the file COPYING3. If not see #define ASM_CPU32_DEFAULT_SPEC AS_SPARC32_FLAG AS_NIAGARA4_FLAG #undef ASM_CPU64_DEFAULT_SPEC #define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG AS_NIAGARA4_FLAG -#undef ASM_CPU_DEFAULT_SPEC -#define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC #endif #undef CPP_CPU_SPEC @@ -361,6 +347,23 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); } \ while (0) +/* Solaris as has a bug: a .common directive in .tbss or .tdata section + behaves as .tls_common rather than normal non-TLS .common. */ +#undef ASM_OUTPUT_ALIGNED_COMMON +#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \ + do \ + { \ + if (TARGET_SUN_TLS \ + && in_section \ + && ((in_section->common.flags & SECTION_TLS) == SECTION_TLS)) \ + switch_to_section (bss_section); \ + fprintf ((FILE), "%s", COMMON_ASM_OP); \ + assemble_name ((FILE), (NAME)); \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", \ + (SIZE), (ALIGN) / BITS_PER_UNIT); \ + } \ + while (0) + #ifndef USE_GAS /* This is how to output an assembler line that says to advance the location counter to a multiple of 2**LOG bytes using the @@ -376,7 +379,6 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); /* Sun as requires doublequoted section names on SPARC. While GNU as supports that, too, we prefer the standard variant. */ -#undef SECTION_NAME_FORMAT #define SECTION_NAME_FORMAT "\"%s\"" #endif /* !USE_GAS */ diff --git a/gcc/config/sparc/t-sol2-64 b/gcc/config/sparc/t-sol2 index ec7e4eba6fd..ec7e4eba6fd 100644 --- a/gcc/config/sparc/t-sol2-64 +++ b/gcc/config/sparc/t-sol2 |