diff options
Diffstat (limited to 'gcc/config/aarch64/arm_neon.h')
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 435 |
1 files changed, 180 insertions, 255 deletions
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index cb5860206a1..15d1ed96584 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -634,6 +634,12 @@ vadd_f32 (float32x2_t __a, float32x2_t __b) return __a + __b; } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vadd_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a + __b; +} + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vadd_u8 (uint8x8_t __a, uint8x8_t __b) { @@ -1204,6 +1210,12 @@ vdiv_f32 (float32x2_t __a, float32x2_t __b) return __a / __b; } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vdiv_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a / __b; +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vdivq_f32 (float32x4_t __a, float32x4_t __b) { @@ -1824,6 +1836,12 @@ vsub_f32 (float32x2_t __a, float32x2_t __b) return __a - __b; } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vsub_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a - __b; +} + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vsub_u8 (uint8x8_t __a, uint8x8_t __b) { @@ -5140,138 +5158,6 @@ vclsq_s32 (int32x4_t a) return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vclz_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("clz %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vclz_s16 (int16x4_t a) -{ - int16x4_t result; - __asm__ ("clz %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vclz_s32 (int32x2_t a) -{ - int32x2_t result; - __asm__ ("clz %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vclz_u8 (uint8x8_t a) -{ - uint8x8_t result; - __asm__ ("clz %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vclz_u16 (uint16x4_t a) -{ - uint16x4_t result; - __asm__ ("clz %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vclz_u32 (uint32x2_t a) -{ - uint32x2_t result; - __asm__ ("clz %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vclzq_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("clz %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vclzq_s16 (int16x8_t a) -{ - int16x8_t result; - __asm__ ("clz %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vclzq_s32 (int32x4_t a) -{ - int32x4_t result; - __asm__ ("clz %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vclzq_u8 (uint8x16_t a) -{ - uint8x16_t result; - __asm__ ("clz %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vclzq_u16 (uint16x8_t a) -{ - uint16x8_t result; - __asm__ ("clz %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vclzq_u32 (uint32x4_t a) -{ - uint32x4_t result; - __asm__ ("clz %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vcnt_p8 (poly8x8_t a) { @@ -5556,7 +5442,7 @@ static float32x2_t vdup_n_f32 (float32_t); __extension__ \ ({ \ int64_t a_ = (a); \ - int64_t result; \ + float64_t result; \ __asm__ ("scvtf %d0,%d1,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ @@ -5568,7 +5454,7 @@ static float32x2_t vdup_n_f32 (float32_t); __extension__ \ ({ \ uint64_t a_ = (a); \ - uint64_t result; \ + float64_t result; \ __asm__ ("ucvtf %d0,%d1,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ @@ -5580,7 +5466,7 @@ static float32x2_t vdup_n_f32 (float32_t); __extension__ \ ({ \ float64_t a_ = (a); \ - float64_t result; \ + int64_t result; \ __asm__ ("fcvtzs %d0,%d1,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ @@ -5592,7 +5478,7 @@ static float32x2_t vdup_n_f32 (float32_t); __extension__ \ ({ \ float64_t a_ = (a); \ - float64_t result; \ + uint64_t result; \ __asm__ ("fcvtzu %d0,%d1,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ @@ -5700,7 +5586,7 @@ static float32x2_t vdup_n_f32 (float32_t); __extension__ \ ({ \ int32_t a_ = (a); \ - int32_t result; \ + float32_t result; \ __asm__ ("scvtf %s0,%s1,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ @@ -5712,7 +5598,7 @@ static float32x2_t vdup_n_f32 (float32_t); __extension__ \ ({ \ uint32_t a_ = (a); \ - uint32_t result; \ + float32_t result; \ __asm__ ("ucvtf %s0,%s1,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ @@ -5724,7 +5610,7 @@ static float32x2_t vdup_n_f32 (float32_t); __extension__ \ ({ \ float32_t a_ = (a); \ - float32_t result; \ + int32_t result; \ __asm__ ("fcvtzs %s0,%s1,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ @@ -5736,7 +5622,7 @@ static float32x2_t vdup_n_f32 (float32_t); __extension__ \ ({ \ float32_t a_ = (a); \ - float32_t result; \ + uint32_t result; \ __asm__ ("fcvtzu %s0,%s1,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ @@ -9785,115 +9671,6 @@ vmvnq_u32 (uint32x4_t a) return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vneg_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("fneg %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vneg_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("neg %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vneg_s16 (int16x4_t a) -{ - int16x4_t result; - __asm__ ("neg %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vneg_s32 (int32x2_t a) -{ - int32x2_t result; - __asm__ ("neg %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vnegq_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("fneg %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vnegq_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("fneg %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vnegq_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("neg %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vnegq_s16 (int16x8_t a) -{ - int16x8_t result; - __asm__ ("neg %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vnegq_s32 (int32x4_t a) -{ - int32x4_t result; - __asm__ ("neg %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vnegq_s64 (int64x2_t a) -{ - int64x2_t result; - __asm__ ("neg %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vpadal_s8 (int16x4_t a, int8x8_t b) @@ -15859,7 +15636,7 @@ vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx) "cmhs %0.8b, %3.8b, %0.8b\n\t" "tbl %1.8b, {%2.16b}, %3.8b\n\t" "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=w"(tmp1) + : "+w"(result), "=&w"(tmp1) : "w"(temp), "w"(idx), "w"(r) : /* No clobbers */); return result; @@ -15875,7 +15652,7 @@ vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx) "cmhs %0.8b, %3.8b, %0.8b\n\t" "tbl %1.8b, {%2.16b}, %3.8b\n\t" "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=w"(tmp1) + : "+w"(result), "=&w"(tmp1) : "w"(temp), "w"(idx), "w"(r) : /* No clobbers */); return result; @@ -15891,7 +15668,7 @@ vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx) "cmhs %0.8b, %3.8b, %0.8b\n\t" "tbl %1.8b, {%2.16b}, %3.8b\n\t" "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=w"(tmp1) + : "+w"(result), "=&w"(tmp1) : "w"(temp), "w"(idx), "w"(r) : /* No clobbers */); return result; @@ -15946,7 +15723,7 @@ vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx) "cmhs %0.8b, %3.8b, %0.8b\n\t" "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t" "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=w"(tmp1) + : "+w"(result), "=&w"(tmp1) : "Q"(temp), "w"(idx), "w"(r) : "v16", "v17", "memory"); return result; @@ -15965,7 +15742,7 @@ vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx) "cmhs %0.8b, %3.8b, %0.8b\n\t" "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t" "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=w"(tmp1) + : "+w"(result), "=&w"(tmp1) : "Q"(temp), "w"(idx), "w"(r) : "v16", "v17", "memory"); return result; @@ -15984,7 +15761,7 @@ vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx) "cmhs %0.8b, %3.8b, %0.8b\n\t" "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t" "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=w"(tmp1) + : "+w"(result), "=&w"(tmp1) : "Q"(temp), "w"(idx), "w"(r) : "v16", "v17", "memory"); return result; @@ -18025,6 +17802,80 @@ vcltzd_f64 (float64_t __a) return __a < 0.0 ? -1ll : 0ll; } +/* vclz. */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vclz_s8 (int8x8_t __a) +{ + return __builtin_aarch64_clzv8qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vclz_s16 (int16x4_t __a) +{ + return __builtin_aarch64_clzv4hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vclz_s32 (int32x2_t __a) +{ + return __builtin_aarch64_clzv2si (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclz_u8 (uint8x8_t __a) +{ + return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclz_u16 (uint16x4_t __a) +{ + return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclz_u32 (uint32x2_t __a) +{ + return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vclzq_s8 (int8x16_t __a) +{ + return __builtin_aarch64_clzv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vclzq_s16 (int16x8_t __a) +{ + return __builtin_aarch64_clzv8hi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vclzq_s32 (int32x4_t __a) +{ + return __builtin_aarch64_clzv4si (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vclzq_u8 (uint8x16_t __a) +{ + return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vclzq_u16 (uint16x8_t __a) +{ + return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vclzq_u32 (uint32x4_t __a) +{ + return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a); +} + /* vcvt (double -> float). */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) @@ -21241,6 +21092,80 @@ vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane) return __a * __aarch64_vgetq_lane_u32 (__b, __lane); } +/* vneg */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vneg_f32 (float32x2_t __a) +{ + return -__a; +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vneg_f64 (float64x1_t __a) +{ + return -__a; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vneg_s8 (int8x8_t __a) +{ + return -__a; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vneg_s16 (int16x4_t __a) +{ + return -__a; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vneg_s32 (int32x2_t __a) +{ + return -__a; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vneg_s64 (int64x1_t __a) +{ + return -__a; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vnegq_f32 (float32x4_t __a) +{ + return -__a; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vnegq_f64 (float64x2_t __a) +{ + return -__a; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vnegq_s8 (int8x16_t __a) +{ + return -__a; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vnegq_s16 (int16x8_t __a) +{ + return -__a; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vnegq_s32 (int32x4_t __a) +{ + return -__a; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vnegq_s64 (int64x2_t __a) +{ + return -__a; +} + /* vqabs */ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) |