diff options
author | alalaw01 <alalaw01@138bc75d-0d04-0410-961f-82ee72b054a4> | 2014-06-03 11:28:55 +0000 |
---|---|---|
committer | alalaw01 <alalaw01@138bc75d-0d04-0410-961f-82ee72b054a4> | 2014-06-03 11:28:55 +0000 |
commit | 5f5fccf784ad7a3096d02687435060f8232c9198 (patch) | |
tree | cea9f6dffba394b50862fffc14e64513e44bfcf8 /gcc | |
parent | 32ac2387960a5b042c1ca3089a4f25b5b49fd33f (diff) | |
download | gcc-5f5fccf784ad7a3096d02687435060f8232c9198.tar.gz |
Recognize shuffle patterns for REV instructions on AArch64, rewrite intrinsics.
* config/aarch64/aarch64-simd.md (aarch64_rev<REVERSE:rev-op><mode>):
New pattern.
* config/aarch64/aarch64.c (aarch64_evpc_rev): New function.
(aarch64_expand_vec_perm_const_1): Add call to aarch64_evpc_rev.
* config/aarch64/iterators.md (REVERSE): New iterator.
(UNSPEC_REV64, UNSPEC_REV32, UNSPEC_REV16): New enum elements.
(rev_op): New int_attribute.
* config/aarch64/arm_neon.h (vrev16_p8, vrev16_s8, vrev16_u8,
vrev16q_p8, vrev16q_s8, vrev16q_u8, vrev32_p8, vrev32_p16, vrev32_s8,
vrev32_s16, vrev32_u8, vrev32_u16, vrev32q_p8, vrev32q_p16, vrev32q_s8,
vrev32q_s16, vrev32q_u8, vrev32q_u16, vrev64_f32, vrev64_p8,
vrev64_p16, vrev64_s8, vrev64_s16, vrev64_s32, vrev64_u8, vrev64_u16,
vrev64_u32, vrev64q_f32, vrev64q_p8, vrev64q_p16, vrev64q_s8,
vrev64q_s16, vrev64q_s32, vrev64q_u8, vrev64q_u16, vrev64q_u32):
Replace temporary __asm__ with __builtin_shuffle.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@211174 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 18 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 9 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 78 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 624 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 9 |
5 files changed, 341 insertions, 397 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index dc414dd26e7..1f09b046bb5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2014-06-03 Alan Lawrence <alan.lawrence@arm.com> + + * config/aarch64/aarch64-simd.md (aarch64_rev<REVERSE:rev-op><mode>): + New pattern. + * config/aarch64/aarch64.c (aarch64_evpc_rev): New function. + (aarch64_expand_vec_perm_const_1): Add call to aarch64_evpc_rev. + * config/aarch64/iterators.md (REVERSE): New iterator. + (UNSPEC_REV64, UNSPEC_REV32, UNSPEC_REV16): New enum elements. + (rev_op): New int_attribute. + * config/aarch64/arm_neon.h (vrev16_p8, vrev16_s8, vrev16_u8, + vrev16q_p8, vrev16q_s8, vrev16q_u8, vrev32_p8, vrev32_p16, vrev32_s8, + vrev32_s16, vrev32_u8, vrev32_u16, vrev32q_p8, vrev32q_p16, vrev32q_s8, + vrev32q_s16, vrev32q_u8, vrev32q_u16, vrev64_f32, vrev64_p8, + vrev64_p16, vrev64_s8, vrev64_s16, vrev64_s32, vrev64_u8, vrev64_u16, + vrev64_u32, vrev64q_f32, vrev64q_p8, vrev64q_p16, vrev64q_s8, + vrev64q_s16, vrev64q_s32, vrev64q_u8, vrev64q_u16, vrev64q_u32): + Replace temporary __asm__ with __builtin_shuffle. + 2014-06-03 Andrew Bennett <andrew.bennett@imgtec.com> * config/mips/mips-cpus.def: Add mips32r3, mips32r5, mips64r3 and diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c239677a581..a1f0ff53eb3 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4196,6 +4196,15 @@ } ) +(define_insn "aarch64_rev<REVERSE:rev_op><mode>" + [(set (match_operand:VALL 0 "register_operand" "=w") + (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")] + REVERSE))] + "TARGET_SIMD" + "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "type" "neon_rev<q>")] +) + (define_insn "aarch64_st2<mode>_dreg" [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv") (unspec:TI [(match_operand:OI 1 "register_operand" "w") diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index bb33304a2fd..65ef84a8ffa 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -9058,6 +9058,80 @@ aarch64_evpc_ext (struct expand_vec_perm_d *d) return true; } +/* Recognize patterns for the REV insns. */ + +static bool +aarch64_evpc_rev (struct expand_vec_perm_d *d) +{ + unsigned int i, j, diff, nelt = d->nelt; + rtx (*gen) (rtx, rtx); + + if (!d->one_vector_p) + return false; + + diff = d->perm[0]; + switch (diff) + { + case 7: + switch (d->vmode) + { + case V16QImode: gen = gen_aarch64_rev64v16qi; break; + case V8QImode: gen = gen_aarch64_rev64v8qi; break; + default: + return false; + } + break; + case 3: + switch (d->vmode) + { + case V16QImode: gen = gen_aarch64_rev32v16qi; break; + case V8QImode: gen = gen_aarch64_rev32v8qi; break; + case V8HImode: gen = gen_aarch64_rev64v8hi; break; + case V4HImode: gen = gen_aarch64_rev64v4hi; break; + default: + return false; + } + break; + case 1: + switch (d->vmode) + { + case V16QImode: gen = gen_aarch64_rev16v16qi; break; + case V8QImode: gen = gen_aarch64_rev16v8qi; break; + case V8HImode: gen = gen_aarch64_rev32v8hi; break; + case V4HImode: gen = gen_aarch64_rev32v4hi; break; + case V4SImode: gen = gen_aarch64_rev64v4si; break; + case V2SImode: gen = gen_aarch64_rev64v2si; break; + case V4SFmode: gen = gen_aarch64_rev64v4sf; break; + case V2SFmode: gen = gen_aarch64_rev64v2sf; break; + default: + return false; + } + break; + default: + return false; + } + + for (i = 0; i < nelt ; i += diff + 1) + for (j = 0; j <= diff; j += 1) + { + /* This is guaranteed to be true as the value of diff + is 7, 3, 1 and we should have enough elements in the + queue to generate this. Getting a vector mask with a + value of diff other than these values implies that + something is wrong by the time we get here. */ + gcc_assert (i + j < nelt); + if (d->perm[i + j] != i + diff - j) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + emit_insn (gen (d->target, d->op0)); + return true; +} + static bool aarch64_evpc_dup (struct expand_vec_perm_d *d) { @@ -9162,7 +9236,9 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) if (TARGET_SIMD) { - if (aarch64_evpc_ext (d)) + if (aarch64_evpc_rev (d)) + return true; + else if (aarch64_evpc_ext (d)) return true; else if (aarch64_evpc_zip (d)) return true; diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index c4b5731996c..60b28447002 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -10563,402 +10563,6 @@ vrecpeq_u32 (uint32x4_t a) return result; } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vrev16_p8 (poly8x8_t a) -{ - poly8x8_t result; - __asm__ ("rev16 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vrev16_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("rev16 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vrev16_u8 (uint8x8_t a) -{ - uint8x8_t result; - __asm__ ("rev16 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vrev16q_p8 (poly8x16_t a) -{ - poly8x16_t result; - __asm__ ("rev16 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vrev16q_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("rev16 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vrev16q_u8 (uint8x16_t a) -{ - uint8x16_t result; - __asm__ ("rev16 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vrev32_p8 (poly8x8_t a) -{ - poly8x8_t result; - __asm__ ("rev32 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vrev32_p16 (poly16x4_t a) -{ - poly16x4_t result; - __asm__ ("rev32 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vrev32_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("rev32 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vrev32_s16 (int16x4_t a) -{ - int16x4_t result; - __asm__ ("rev32 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vrev32_u8 (uint8x8_t a) -{ - uint8x8_t result; - __asm__ ("rev32 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vrev32_u16 (uint16x4_t a) -{ - uint16x4_t result; - __asm__ ("rev32 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vrev32q_p8 (poly8x16_t a) -{ - poly8x16_t result; - __asm__ ("rev32 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vrev32q_p16 (poly16x8_t a) -{ - poly16x8_t result; - __asm__ ("rev32 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vrev32q_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("rev32 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vrev32q_s16 (int16x8_t a) -{ - int16x8_t result; - __asm__ ("rev32 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vrev32q_u8 (uint8x16_t a) -{ - uint8x16_t result; - __asm__ ("rev32 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vrev32q_u16 (uint16x8_t a) -{ - uint16x8_t result; - __asm__ ("rev32 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrev64_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("rev64 %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vrev64_p8 (poly8x8_t a) -{ - poly8x8_t result; - __asm__ ("rev64 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vrev64_p16 (poly16x4_t a) -{ - poly16x4_t result; - __asm__ ("rev64 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vrev64_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("rev64 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vrev64_s16 (int16x4_t a) -{ - int16x4_t result; - __asm__ ("rev64 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vrev64_s32 (int32x2_t a) -{ - int32x2_t result; - __asm__ ("rev64 %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vrev64_u8 (uint8x8_t a) -{ - uint8x8_t result; - __asm__ ("rev64 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vrev64_u16 (uint16x4_t a) -{ - uint16x4_t result; - __asm__ ("rev64 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vrev64_u32 (uint32x2_t a) -{ - uint32x2_t result; - __asm__ ("rev64 %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrev64q_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("rev64 %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vrev64q_p8 (poly8x16_t a) -{ - poly8x16_t result; - __asm__ ("rev64 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vrev64q_p16 (poly16x8_t a) -{ - poly16x8_t result; - __asm__ ("rev64 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vrev64q_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("rev64 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vrev64q_s16 (int16x8_t a) -{ - int16x8_t result; - __asm__ ("rev64 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vrev64q_s32 (int32x4_t a) -{ - int32x4_t result; - __asm__ ("rev64 %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vrev64q_u8 (uint8x16_t a) -{ - uint8x16_t result; - __asm__ ("rev64 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vrev64q_u16 (uint16x8_t a) -{ - uint16x8_t result; - __asm__ ("rev64 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vrev64q_u32 (uint32x4_t a) -{ - uint32x4_t result; - __asm__ ("rev64 %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - #define vrshrn_high_n_s16(a, b, c) \ __extension__ \ ({ \ @@ -21414,6 +21018,234 @@ vrecpxd_f64 (float64_t __a) return __builtin_aarch64_frecpxdf (__a); } + +/* vrev */ + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev16_p8 (poly8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev16_s8 (int8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev16_u8 (uint8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev16q_p8 (poly8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev16q_s8 (int8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev16q_u8 (uint8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev32_p8 (poly8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vrev32_p16 (poly16x4_t a) +{ + return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev32_s8 (int8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrev32_s16 (int16x4_t a) +{ + return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev32_u8 (uint8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrev32_u16 (uint16x4_t a) +{ + return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev32q_p8 (poly8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vrev32q_p16 (poly16x8_t a) +{ + return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev32q_s8 (int8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrev32q_s16 (int16x8_t a) +{ + return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev32q_u8 (uint8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrev32q_u16 (uint16x8_t a) +{ + return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrev64_f32 (float32x2_t a) +{ + return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev64_p8 (poly8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vrev64_p16 (poly16x4_t a) +{ + return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev64_s8 (int8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrev64_s16 (int16x4_t a) +{ + return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrev64_s32 (int32x2_t a) +{ + return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev64_u8 (uint8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrev64_u16 (uint16x4_t a) +{ + return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrev64_u32 (uint32x2_t a) +{ + return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrev64q_f32 (float32x4_t a) +{ + return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev64q_p8 (poly8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vrev64q_p16 (poly16x8_t a) +{ + return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev64q_s8 (int8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrev64q_s16 (int16x8_t a) +{ + return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrev64q_s32 (int32x4_t a) +{ + return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev64q_u8 (uint8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrev64q_u16 (uint16x8_t a) +{ + return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrev64q_u32 (uint32x4_t a) +{ + return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); +} + /* vrnd */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 05611f4cd61..05c4f7ea543 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -271,6 +271,9 @@ UNSPEC_TRN1 ; Used in vector permute patterns. UNSPEC_TRN2 ; Used in vector permute patterns. UNSPEC_EXT ; Used in aarch64-simd.md. + UNSPEC_REV64 ; Used in vector reverse patterns (permute). + UNSPEC_REV32 ; Used in vector reverse patterns (permute). + UNSPEC_REV16 ; Used in vector reverse patterns (permute). UNSPEC_AESE ; Used in aarch64-simd.md. UNSPEC_AESD ; Used in aarch64-simd.md. UNSPEC_AESMC ; Used in aarch64-simd.md. @@ -896,6 +899,8 @@ UNSPEC_TRN1 UNSPEC_TRN2 UNSPEC_UZP1 UNSPEC_UZP2]) +(define_int_iterator REVERSE [UNSPEC_REV64 UNSPEC_REV32 UNSPEC_REV16]) + (define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM UNSPEC_FRINTN UNSPEC_FRINTI UNSPEC_FRINTX UNSPEC_FRINTA]) @@ -1023,6 +1028,10 @@ (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn") (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")]) +; op code for REV instructions (size within which elements are reversed). +(define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32") + (UNSPEC_REV16 "16")]) + (define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2") (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2") (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")]) |