diff options
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/arm/arm_mve.h | 2265 | ||||
-rw-r--r-- | gcc/config/arm/arm_mve_builtins.def | 468 | ||||
-rw-r--r-- | gcc/config/arm/mve.md | 1008 |
3 files changed, 3413 insertions, 328 deletions
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index eb81a02d95b..db5e472e819 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -599,6 +599,149 @@ typedef struct { uint8x16_t val[4]; } uint8x16x4_t; #define vshlq_n_s32(__a, __imm) __arm_vshlq_n_s32(__a, __imm) #define vrshrq_n_s32(__a, __imm) __arm_vrshrq_n_s32(__a, __imm) #define vqshlq_n_s32(__a, __imm) __arm_vqshlq_n_s32(__a, __imm) +#define vqmovntq_u16(__a, __b) __arm_vqmovntq_u16(__a, __b) +#define vqmovnbq_u16(__a, __b) __arm_vqmovnbq_u16(__a, __b) +#define vmulltq_poly_p8(__a, __b) __arm_vmulltq_poly_p8(__a, __b) +#define vmullbq_poly_p8(__a, __b) __arm_vmullbq_poly_p8(__a, __b) +#define vmovntq_u16(__a, __b) __arm_vmovntq_u16(__a, __b) +#define vmovnbq_u16(__a, __b) __arm_vmovnbq_u16(__a, __b) +#define vmlaldavq_u16(__a, __b) __arm_vmlaldavq_u16(__a, __b) +#define vqmovuntq_s16(__a, __b) __arm_vqmovuntq_s16(__a, __b) +#define vqmovunbq_s16(__a, __b) __arm_vqmovunbq_s16(__a, __b) +#define vshlltq_n_u8(__a, __imm) __arm_vshlltq_n_u8(__a, __imm) +#define vshllbq_n_u8(__a, __imm) __arm_vshllbq_n_u8(__a, __imm) +#define vorrq_n_u16(__a, __imm) __arm_vorrq_n_u16(__a, __imm) +#define vbicq_n_u16(__a, __imm) __arm_vbicq_n_u16(__a, __imm) +#define vcmpneq_n_f16(__a, __b) __arm_vcmpneq_n_f16(__a, __b) +#define vcmpneq_f16(__a, __b) __arm_vcmpneq_f16(__a, __b) +#define vcmpltq_n_f16(__a, __b) __arm_vcmpltq_n_f16(__a, __b) +#define vcmpltq_f16(__a, __b) __arm_vcmpltq_f16(__a, __b) +#define vcmpleq_n_f16(__a, __b) __arm_vcmpleq_n_f16(__a, __b) +#define vcmpleq_f16(__a, __b) __arm_vcmpleq_f16(__a, __b) +#define vcmpgtq_n_f16(__a, __b) __arm_vcmpgtq_n_f16(__a, __b) +#define vcmpgtq_f16(__a, __b) __arm_vcmpgtq_f16(__a, __b) +#define vcmpgeq_n_f16(__a, __b) __arm_vcmpgeq_n_f16(__a, __b) +#define vcmpgeq_f16(__a, __b) __arm_vcmpgeq_f16(__a, __b) +#define vcmpeqq_n_f16(__a, __b) __arm_vcmpeqq_n_f16(__a, __b) +#define vcmpeqq_f16(__a, __b) __arm_vcmpeqq_f16(__a, __b) +#define vsubq_f16(__a, __b) __arm_vsubq_f16(__a, __b) +#define vqmovntq_s16(__a, __b) __arm_vqmovntq_s16(__a, __b) +#define vqmovnbq_s16(__a, __b) __arm_vqmovnbq_s16(__a, __b) +#define vqdmulltq_s16(__a, __b) __arm_vqdmulltq_s16(__a, __b) +#define vqdmulltq_n_s16(__a, __b) __arm_vqdmulltq_n_s16(__a, __b) +#define vqdmullbq_s16(__a, __b) __arm_vqdmullbq_s16(__a, __b) +#define vqdmullbq_n_s16(__a, __b) __arm_vqdmullbq_n_s16(__a, __b) +#define vorrq_f16(__a, __b) __arm_vorrq_f16(__a, __b) +#define vornq_f16(__a, __b) __arm_vornq_f16(__a, __b) +#define vmulq_n_f16(__a, __b) __arm_vmulq_n_f16(__a, __b) +#define vmulq_f16(__a, __b) __arm_vmulq_f16(__a, __b) +#define vmovntq_s16(__a, __b) __arm_vmovntq_s16(__a, __b) +#define vmovnbq_s16(__a, __b) __arm_vmovnbq_s16(__a, __b) +#define vmlsldavxq_s16(__a, __b) __arm_vmlsldavxq_s16(__a, __b) +#define vmlsldavq_s16(__a, __b) __arm_vmlsldavq_s16(__a, __b) +#define vmlaldavxq_s16(__a, __b) __arm_vmlaldavxq_s16(__a, __b) +#define vmlaldavq_s16(__a, __b) __arm_vmlaldavq_s16(__a, __b) +#define vminnmvq_f16(__a, __b) __arm_vminnmvq_f16(__a, __b) +#define vminnmq_f16(__a, __b) __arm_vminnmq_f16(__a, __b) +#define vminnmavq_f16(__a, __b) __arm_vminnmavq_f16(__a, __b) +#define vminnmaq_f16(__a, __b) __arm_vminnmaq_f16(__a, __b) +#define vmaxnmvq_f16(__a, __b) __arm_vmaxnmvq_f16(__a, __b) +#define vmaxnmq_f16(__a, __b) __arm_vmaxnmq_f16(__a, __b) +#define vmaxnmavq_f16(__a, __b) __arm_vmaxnmavq_f16(__a, __b) +#define vmaxnmaq_f16(__a, __b) __arm_vmaxnmaq_f16(__a, __b) +#define veorq_f16(__a, __b) __arm_veorq_f16(__a, __b) +#define vcmulq_rot90_f16(__a, __b) __arm_vcmulq_rot90_f16(__a, __b) +#define vcmulq_rot270_f16(__a, __b) __arm_vcmulq_rot270_f16(__a, __b) +#define vcmulq_rot180_f16(__a, __b) __arm_vcmulq_rot180_f16(__a, __b) +#define vcmulq_f16(__a, __b) __arm_vcmulq_f16(__a, __b) +#define vcaddq_rot90_f16(__a, __b) __arm_vcaddq_rot90_f16(__a, __b) +#define vcaddq_rot270_f16(__a, __b) __arm_vcaddq_rot270_f16(__a, __b) +#define vbicq_f16(__a, __b) __arm_vbicq_f16(__a, __b) +#define vandq_f16(__a, __b) __arm_vandq_f16(__a, __b) +#define vaddq_n_f16(__a, __b) __arm_vaddq_n_f16(__a, __b) +#define vabdq_f16(__a, __b) __arm_vabdq_f16(__a, __b) +#define vshlltq_n_s8(__a, __imm) __arm_vshlltq_n_s8(__a, __imm) +#define vshllbq_n_s8(__a, __imm) __arm_vshllbq_n_s8(__a, __imm) +#define vorrq_n_s16(__a, __imm) __arm_vorrq_n_s16(__a, __imm) +#define vbicq_n_s16(__a, __imm) __arm_vbicq_n_s16(__a, __imm) +#define vqmovntq_u32(__a, __b) __arm_vqmovntq_u32(__a, __b) +#define vqmovnbq_u32(__a, __b) __arm_vqmovnbq_u32(__a, __b) +#define vmulltq_poly_p16(__a, __b) __arm_vmulltq_poly_p16(__a, __b) +#define vmullbq_poly_p16(__a, __b) __arm_vmullbq_poly_p16(__a, __b) +#define vmovntq_u32(__a, __b) __arm_vmovntq_u32(__a, __b) +#define vmovnbq_u32(__a, __b) __arm_vmovnbq_u32(__a, __b) +#define vmlaldavq_u32(__a, __b) __arm_vmlaldavq_u32(__a, __b) +#define vqmovuntq_s32(__a, __b) __arm_vqmovuntq_s32(__a, __b) +#define vqmovunbq_s32(__a, __b) __arm_vqmovunbq_s32(__a, __b) +#define vshlltq_n_u16(__a, __imm) __arm_vshlltq_n_u16(__a, __imm) +#define vshllbq_n_u16(__a, __imm) __arm_vshllbq_n_u16(__a, __imm) +#define vorrq_n_u32(__a, __imm) __arm_vorrq_n_u32(__a, __imm) +#define vbicq_n_u32(__a, __imm) __arm_vbicq_n_u32(__a, __imm) +#define vcmpneq_n_f32(__a, __b) __arm_vcmpneq_n_f32(__a, __b) +#define vcmpneq_f32(__a, __b) __arm_vcmpneq_f32(__a, __b) +#define vcmpltq_n_f32(__a, __b) __arm_vcmpltq_n_f32(__a, __b) +#define vcmpltq_f32(__a, __b) __arm_vcmpltq_f32(__a, __b) +#define vcmpleq_n_f32(__a, __b) __arm_vcmpleq_n_f32(__a, __b) +#define vcmpleq_f32(__a, __b) __arm_vcmpleq_f32(__a, __b) +#define vcmpgtq_n_f32(__a, __b) __arm_vcmpgtq_n_f32(__a, __b) +#define vcmpgtq_f32(__a, __b) __arm_vcmpgtq_f32(__a, __b) +#define vcmpgeq_n_f32(__a, __b) __arm_vcmpgeq_n_f32(__a, __b) +#define vcmpgeq_f32(__a, __b) __arm_vcmpgeq_f32(__a, __b) +#define vcmpeqq_n_f32(__a, __b) __arm_vcmpeqq_n_f32(__a, __b) +#define vcmpeqq_f32(__a, __b) __arm_vcmpeqq_f32(__a, __b) +#define vsubq_f32(__a, __b) __arm_vsubq_f32(__a, __b) +#define vqmovntq_s32(__a, __b) __arm_vqmovntq_s32(__a, __b) +#define vqmovnbq_s32(__a, __b) __arm_vqmovnbq_s32(__a, __b) +#define vqdmulltq_s32(__a, __b) __arm_vqdmulltq_s32(__a, __b) +#define vqdmulltq_n_s32(__a, __b) __arm_vqdmulltq_n_s32(__a, __b) +#define vqdmullbq_s32(__a, __b) __arm_vqdmullbq_s32(__a, __b) +#define vqdmullbq_n_s32(__a, __b) __arm_vqdmullbq_n_s32(__a, __b) +#define vorrq_f32(__a, __b) __arm_vorrq_f32(__a, __b) +#define vornq_f32(__a, __b) __arm_vornq_f32(__a, __b) +#define vmulq_n_f32(__a, __b) __arm_vmulq_n_f32(__a, __b) +#define vmulq_f32(__a, __b) __arm_vmulq_f32(__a, __b) +#define vmovntq_s32(__a, __b) __arm_vmovntq_s32(__a, __b) +#define vmovnbq_s32(__a, __b) __arm_vmovnbq_s32(__a, __b) +#define vmlsldavxq_s32(__a, __b) __arm_vmlsldavxq_s32(__a, __b) +#define vmlsldavq_s32(__a, __b) __arm_vmlsldavq_s32(__a, __b) +#define vmlaldavxq_s32(__a, __b) __arm_vmlaldavxq_s32(__a, __b) +#define vmlaldavq_s32(__a, __b) __arm_vmlaldavq_s32(__a, __b) +#define vminnmvq_f32(__a, __b) __arm_vminnmvq_f32(__a, __b) +#define vminnmq_f32(__a, __b) __arm_vminnmq_f32(__a, __b) +#define vminnmavq_f32(__a, __b) __arm_vminnmavq_f32(__a, __b) +#define vminnmaq_f32(__a, __b) __arm_vminnmaq_f32(__a, __b) +#define vmaxnmvq_f32(__a, __b) __arm_vmaxnmvq_f32(__a, __b) +#define vmaxnmq_f32(__a, __b) __arm_vmaxnmq_f32(__a, __b) +#define vmaxnmavq_f32(__a, __b) __arm_vmaxnmavq_f32(__a, __b) +#define vmaxnmaq_f32(__a, __b) __arm_vmaxnmaq_f32(__a, __b) +#define veorq_f32(__a, __b) __arm_veorq_f32(__a, __b) +#define vcmulq_rot90_f32(__a, __b) __arm_vcmulq_rot90_f32(__a, __b) +#define vcmulq_rot270_f32(__a, __b) __arm_vcmulq_rot270_f32(__a, __b) +#define vcmulq_rot180_f32(__a, __b) __arm_vcmulq_rot180_f32(__a, __b) +#define vcmulq_f32(__a, __b) __arm_vcmulq_f32(__a, __b) +#define vcaddq_rot90_f32(__a, __b) __arm_vcaddq_rot90_f32(__a, __b) +#define vcaddq_rot270_f32(__a, __b) __arm_vcaddq_rot270_f32(__a, __b) +#define vbicq_f32(__a, __b) __arm_vbicq_f32(__a, __b) +#define vandq_f32(__a, __b) __arm_vandq_f32(__a, __b) +#define vaddq_n_f32(__a, __b) __arm_vaddq_n_f32(__a, __b) +#define vabdq_f32(__a, __b) __arm_vabdq_f32(__a, __b) +#define vshlltq_n_s16(__a, __imm) __arm_vshlltq_n_s16(__a, __imm) +#define vshllbq_n_s16(__a, __imm) __arm_vshllbq_n_s16(__a, __imm) +#define vorrq_n_s32(__a, __imm) __arm_vorrq_n_s32(__a, __imm) +#define vbicq_n_s32(__a, __imm) __arm_vbicq_n_s32(__a, __imm) +#define vrmlaldavhq_u32(__a, __b) __arm_vrmlaldavhq_u32(__a, __b) +#define vctp8q_m(__a, __p) __arm_vctp8q_m(__a, __p) +#define vctp64q_m(__a, __p) __arm_vctp64q_m(__a, __p) +#define vctp32q_m(__a, __p) __arm_vctp32q_m(__a, __p) +#define vctp16q_m(__a, __p) __arm_vctp16q_m(__a, __p) +#define vaddlvaq_u32(__a, __b) __arm_vaddlvaq_u32(__a, __b) +#define vrmlsldavhxq_s32(__a, __b) __arm_vrmlsldavhxq_s32(__a, __b) +#define vrmlsldavhq_s32(__a, __b) __arm_vrmlsldavhq_s32(__a, __b) +#define vrmlaldavhxq_s32(__a, __b) __arm_vrmlaldavhxq_s32(__a, __b) +#define vrmlaldavhq_s32(__a, __b) __arm_vrmlaldavhq_s32(__a, __b) +#define vcvttq_f16_f32(__a, __b) __arm_vcvttq_f16_f32(__a, __b) +#define vcvtbq_f16_f32(__a, __b) __arm_vcvtbq_f16_f32(__a, __b) +#define vaddlvaq_s32(__a, __b) __arm_vaddlvaq_s32(__a, __b) #endif __extension__ extern __inline void @@ -3859,6 +4002,489 @@ __arm_vqshlq_n_s32 (int32x4_t __a, const int __imm) return __builtin_mve_vqshlq_n_sv4si (__a, __imm); } +__extension__ extern __inline uint8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqmovntq_u16 (uint8x16_t __a, uint16x8_t __b) +{ + return __builtin_mve_vqmovntq_uv8hi (__a, __b); +} + +__extension__ extern __inline uint8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqmovnbq_u16 (uint8x16_t __a, uint16x8_t __b) +{ + return __builtin_mve_vqmovnbq_uv8hi (__a, __b); +} + +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmulltq_poly_p8 (uint8x16_t __a, uint8x16_t __b) +{ + return __builtin_mve_vmulltq_poly_pv16qi (__a, __b); +} + +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmullbq_poly_p8 (uint8x16_t __a, uint8x16_t __b) +{ + return __builtin_mve_vmullbq_poly_pv16qi (__a, __b); +} + +__extension__ extern __inline uint8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmovntq_u16 (uint8x16_t __a, uint16x8_t __b) +{ + return __builtin_mve_vmovntq_uv8hi (__a, __b); +} + +__extension__ extern __inline uint8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmovnbq_u16 (uint8x16_t __a, uint16x8_t __b) +{ + return __builtin_mve_vmovnbq_uv8hi (__a, __b); +} + +__extension__ extern __inline uint64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmlaldavq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return __builtin_mve_vmlaldavq_uv8hi (__a, __b); +} + +__extension__ extern __inline uint8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqmovuntq_s16 (uint8x16_t __a, int16x8_t __b) +{ + return __builtin_mve_vqmovuntq_sv8hi (__a, __b); +} + +__extension__ extern __inline uint8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqmovunbq_s16 (uint8x16_t __a, int16x8_t __b) +{ + return __builtin_mve_vqmovunbq_sv8hi (__a, __b); +} + +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vshlltq_n_u8 (uint8x16_t __a, const int __imm) +{ + return __builtin_mve_vshlltq_n_uv16qi (__a, __imm); +} + +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vshllbq_n_u8 (uint8x16_t __a, const int __imm) +{ + return __builtin_mve_vshllbq_n_uv16qi (__a, __imm); +} + +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vorrq_n_u16 (uint16x8_t __a, const int __imm) +{ + return __builtin_mve_vorrq_n_uv8hi (__a, __imm); +} + +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vbicq_n_u16 (uint16x8_t __a, const int __imm) +{ + return __builtin_mve_vbicq_n_uv8hi (__a, __imm); +} + +__extension__ extern __inline int8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqmovntq_s16 (int8x16_t __a, int16x8_t __b) +{ + return __builtin_mve_vqmovntq_sv8hi (__a, __b); +} + +__extension__ extern __inline int8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqmovnbq_s16 (int8x16_t __a, int16x8_t __b) +{ + return __builtin_mve_vqmovnbq_sv8hi (__a, __b); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqdmulltq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __builtin_mve_vqdmulltq_sv8hi (__a, __b); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqdmulltq_n_s16 (int16x8_t __a, int16_t __b) +{ + return __builtin_mve_vqdmulltq_n_sv8hi (__a, __b); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqdmullbq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __builtin_mve_vqdmullbq_sv8hi (__a, __b); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqdmullbq_n_s16 (int16x8_t __a, int16_t __b) +{ + return __builtin_mve_vqdmullbq_n_sv8hi (__a, __b); +} + +__extension__ extern __inline int8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmovntq_s16 (int8x16_t __a, int16x8_t __b) +{ + return __builtin_mve_vmovntq_sv8hi (__a, __b); +} + +__extension__ extern __inline int8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmovnbq_s16 (int8x16_t __a, int16x8_t __b) +{ + return __builtin_mve_vmovnbq_sv8hi (__a, __b); +} + +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmlsldavxq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __builtin_mve_vmlsldavxq_sv8hi (__a, __b); +} + +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmlsldavq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __builtin_mve_vmlsldavq_sv8hi (__a, __b); +} + +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmlaldavxq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __builtin_mve_vmlaldavxq_sv8hi (__a, __b); +} + +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmlaldavq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __builtin_mve_vmlaldavq_sv8hi (__a, __b); +} + +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vshlltq_n_s8 (int8x16_t __a, const int __imm) +{ + return __builtin_mve_vshlltq_n_sv16qi (__a, __imm); +} + +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vshllbq_n_s8 (int8x16_t __a, const int __imm) +{ + return __builtin_mve_vshllbq_n_sv16qi (__a, __imm); +} + +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vorrq_n_s16 (int16x8_t __a, const int __imm) +{ + return __builtin_mve_vorrq_n_sv8hi (__a, __imm); +} + +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vbicq_n_s16 (int16x8_t __a, const int __imm) +{ + return __builtin_mve_vbicq_n_sv8hi (__a, __imm); +} + +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqmovntq_u32 (uint16x8_t __a, uint32x4_t __b) +{ + return __builtin_mve_vqmovntq_uv4si (__a, __b); +} + +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqmovnbq_u32 (uint16x8_t __a, uint32x4_t __b) +{ + return __builtin_mve_vqmovnbq_uv4si (__a, __b); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmulltq_poly_p16 (uint16x8_t __a, uint16x8_t __b) +{ + return __builtin_mve_vmulltq_poly_pv8hi (__a, __b); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmullbq_poly_p16 (uint16x8_t __a, uint16x8_t __b) +{ + return __builtin_mve_vmullbq_poly_pv8hi (__a, __b); +} + +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmovntq_u32 (uint16x8_t __a, uint32x4_t __b) +{ + return __builtin_mve_vmovntq_uv4si (__a, __b); +} + +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmovnbq_u32 (uint16x8_t __a, uint32x4_t __b) +{ + return __builtin_mve_vmovnbq_uv4si (__a, __b); +} + +__extension__ extern __inline uint64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmlaldavq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return __builtin_mve_vmlaldavq_uv4si (__a, __b); +} + +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqmovuntq_s32 (uint16x8_t __a, int32x4_t __b) +{ + return __builtin_mve_vqmovuntq_sv4si (__a, __b); +} + +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqmovunbq_s32 (uint16x8_t __a, int32x4_t __b) +{ + return __builtin_mve_vqmovunbq_sv4si (__a, __b); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vshlltq_n_u16 (uint16x8_t __a, const int __imm) +{ + return __builtin_mve_vshlltq_n_uv8hi (__a, __imm); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vshllbq_n_u16 (uint16x8_t __a, const int __imm) +{ + return __builtin_mve_vshllbq_n_uv8hi (__a, __imm); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vorrq_n_u32 (uint32x4_t __a, const int __imm) +{ + return __builtin_mve_vorrq_n_uv4si (__a, __imm); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vbicq_n_u32 (uint32x4_t __a, const int __imm) +{ + return __builtin_mve_vbicq_n_uv4si (__a, __imm); +} + +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqmovntq_s32 (int16x8_t __a, int32x4_t __b) +{ + return __builtin_mve_vqmovntq_sv4si (__a, __b); +} + +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqmovnbq_s32 (int16x8_t __a, int32x4_t __b) +{ + return __builtin_mve_vqmovnbq_sv4si (__a, __b); +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqdmulltq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_mve_vqdmulltq_sv4si (__a, __b); +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqdmulltq_n_s32 (int32x4_t __a, int32_t __b) +{ + return __builtin_mve_vqdmulltq_n_sv4si (__a, __b); +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqdmullbq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_mve_vqdmullbq_sv4si (__a, __b); +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vqdmullbq_n_s32 (int32x4_t __a, int32_t __b) +{ + return __builtin_mve_vqdmullbq_n_sv4si (__a, __b); +} + +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmovntq_s32 (int16x8_t __a, int32x4_t __b) +{ + return __builtin_mve_vmovntq_sv4si (__a, __b); +} + +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmovnbq_s32 (int16x8_t __a, int32x4_t __b) +{ + return __builtin_mve_vmovnbq_sv4si (__a, __b); +} + +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmlsldavxq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_mve_vmlsldavxq_sv4si (__a, __b); +} + +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmlsldavq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_mve_vmlsldavq_sv4si (__a, __b); +} + +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmlaldavxq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_mve_vmlaldavxq_sv4si (__a, __b); +} + +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmlaldavq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_mve_vmlaldavq_sv4si (__a, __b); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vshlltq_n_s16 (int16x8_t __a, const int __imm) +{ + return __builtin_mve_vshlltq_n_sv8hi (__a, __imm); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vshllbq_n_s16 (int16x8_t __a, const int __imm) +{ + return __builtin_mve_vshllbq_n_sv8hi (__a, __imm); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vorrq_n_s32 (int32x4_t __a, const int __imm) +{ + return __builtin_mve_vorrq_n_sv4si (__a, __imm); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vbicq_n_s32 (int32x4_t __a, const int __imm) +{ + return __builtin_mve_vbicq_n_sv4si (__a, __imm); +} + +__extension__ extern __inline uint64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vrmlaldavhq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return __builtin_mve_vrmlaldavhq_uv4si (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vctp8q_m (uint32_t __a, mve_pred16_t __p) +{ + return __builtin_mve_vctp8q_mhi (__a, __p); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vctp64q_m (uint32_t __a, mve_pred16_t __p) +{ + return __builtin_mve_vctp64q_mhi (__a, __p); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vctp32q_m (uint32_t __a, mve_pred16_t __p) +{ + return __builtin_mve_vctp32q_mhi (__a, __p); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vctp16q_m (uint32_t __a, mve_pred16_t __p) +{ + return __builtin_mve_vctp16q_mhi (__a, __p); +} + +__extension__ extern __inline uint64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vaddlvaq_u32 (uint64_t __a, uint32x4_t __b) +{ + return __builtin_mve_vaddlvaq_uv4si (__a, __b); +} + +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vrmlsldavhxq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_mve_vrmlsldavhxq_sv4si (__a, __b); +} + +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vrmlsldavhq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_mve_vrmlsldavhq_sv4si (__a, __b); +} + +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vrmlaldavhxq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_mve_vrmlaldavhxq_sv4si (__a, __b); +} + +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vrmlaldavhq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_mve_vrmlaldavhq_sv4si (__a, __b); +} + +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vaddlvaq_s32 (int64_t __a, int32x4_t __b) +{ + return __builtin_mve_vaddlvaq_sv4si (__a, __b); +} + #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ __extension__ extern __inline void @@ -4145,8 +4771,8 @@ __arm_vcvtaq_u32_f32 (float32x4_t __a) return __builtin_mve_vcvtaq_uv4si (__a); } - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcvtaq_s16_f16 (float16x8_t __a) { return __builtin_mve_vcvtaq_sv8hi (__a); @@ -4299,6 +4925,524 @@ __arm_vcvtq_n_u32_f32 (float32x4_t __a, const int __imm6) return __builtin_mve_vcvtq_n_from_f_uv4si (__a, __imm6); } +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpneq_n_f16 (float16x8_t __a, float16_t __b) +{ + return __builtin_mve_vcmpneq_n_fv8hf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpneq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vcmpneq_fv8hf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpltq_n_f16 (float16x8_t __a, float16_t __b) +{ + return __builtin_mve_vcmpltq_n_fv8hf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpltq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vcmpltq_fv8hf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpleq_n_f16 (float16x8_t __a, float16_t __b) +{ + return __builtin_mve_vcmpleq_n_fv8hf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpleq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vcmpleq_fv8hf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpgtq_n_f16 (float16x8_t __a, float16_t __b) +{ + return __builtin_mve_vcmpgtq_n_fv8hf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpgtq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vcmpgtq_fv8hf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpgeq_n_f16 (float16x8_t __a, float16_t __b) +{ + return __builtin_mve_vcmpgeq_n_fv8hf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpgeq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vcmpgeq_fv8hf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpeqq_n_f16 (float16x8_t __a, float16_t __b) +{ + return __builtin_mve_vcmpeqq_n_fv8hf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpeqq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vcmpeqq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vsubq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vsubq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vorrq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vorrq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vornq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vornq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmulq_n_f16 (float16x8_t __a, float16_t __b) +{ + return __builtin_mve_vmulq_n_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmulq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vmulq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vminnmvq_f16 (float16_t __a, float16x8_t __b) +{ + return __builtin_mve_vminnmvq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vminnmq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vminnmq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vminnmavq_f16 (float16_t __a, float16x8_t __b) +{ + return __builtin_mve_vminnmavq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vminnmaq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vminnmaq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmaxnmvq_f16 (float16_t __a, float16x8_t __b) +{ + return __builtin_mve_vmaxnmvq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmaxnmq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vmaxnmq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmaxnmavq_f16 (float16_t __a, float16x8_t __b) +{ + return __builtin_mve_vmaxnmavq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmaxnmaq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vmaxnmaq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_veorq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_veorq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_rot90_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vcmulq_rot90_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_rot270_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vcmulq_rot270_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_rot180_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vcmulq_rot180_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vcmulq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vcaddq_rot90_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vcaddq_rot270_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vbicq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vbicq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vandq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vandq_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vaddq_n_f16 (float16x8_t __a, float16_t __b) +{ + return __builtin_mve_vaddq_n_fv8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vabdq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_mve_vabdq_fv8hf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpneq_n_f32 (float32x4_t __a, float32_t __b) +{ + return __builtin_mve_vcmpneq_n_fv4sf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpneq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vcmpneq_fv4sf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpltq_n_f32 (float32x4_t __a, float32_t __b) +{ + return __builtin_mve_vcmpltq_n_fv4sf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpltq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vcmpltq_fv4sf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpleq_n_f32 (float32x4_t __a, float32_t __b) +{ + return __builtin_mve_vcmpleq_n_fv4sf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpleq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vcmpleq_fv4sf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpgtq_n_f32 (float32x4_t __a, float32_t __b) +{ + return __builtin_mve_vcmpgtq_n_fv4sf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpgtq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vcmpgtq_fv4sf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpgeq_n_f32 (float32x4_t __a, float32_t __b) +{ + return __builtin_mve_vcmpgeq_n_fv4sf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpgeq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vcmpgeq_fv4sf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpeqq_n_f32 (float32x4_t __a, float32_t __b) +{ + return __builtin_mve_vcmpeqq_n_fv4sf (__a, __b); +} + +__extension__ extern __inline mve_pred16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmpeqq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vcmpeqq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vsubq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vsubq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vorrq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vorrq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vornq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vornq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmulq_n_f32 (float32x4_t __a, float32_t __b) +{ + return __builtin_mve_vmulq_n_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmulq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vmulq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vminnmvq_f32 (float32_t __a, float32x4_t __b) +{ + return __builtin_mve_vminnmvq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vminnmq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vminnmq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vminnmavq_f32 (float32_t __a, float32x4_t __b) +{ + return __builtin_mve_vminnmavq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vminnmaq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vminnmaq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmaxnmvq_f32 (float32_t __a, float32x4_t __b) +{ + return __builtin_mve_vmaxnmvq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmaxnmq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vmaxnmq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmaxnmavq_f32 (float32_t __a, float32x4_t __b) +{ + return __builtin_mve_vmaxnmavq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmaxnmaq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vmaxnmaq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_veorq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_veorq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_rot90_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vcmulq_rot90_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_rot270_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vcmulq_rot270_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_rot180_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vcmulq_rot180_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vcmulq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vcaddq_rot90_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vcaddq_rot270_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vbicq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vbicq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vandq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vandq_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vaddq_n_f32 (float32x4_t __a, float32_t __b) +{ + return __builtin_mve_vaddq_n_fv4sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vabdq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_mve_vabdq_fv4sf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcvttq_f16_f32 (float16x8_t __a, float32x4_t __b) +{ + return __builtin_mve_vcvttq_f16_f32v8hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcvtbq_f16_f32 (float16x8_t __a, float32x4_t __b) +{ + return __builtin_mve_vcvtbq_f16_f32v8hf (__a, __b); +} + #endif enum { @@ -4567,6 +5711,12 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16x8_t]: __arm_vnegq_f16 (__ARM_mve_coerce(__p0, float16x8_t)), \ int (*)[__ARM_mve_type_float32x4_t]: __arm_vnegq_f32 (__ARM_mve_coerce(__p0, float32x4_t)));}) +#define vdupq_n(p0) __arm_vdupq_n(p0) +#define __arm_vdupq_n(p0) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_float16x8_t]: __arm_vdupq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t]: __arm_vdupq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t)));}) + #define vabsq(p0) __arm_vabsq(p0) #define __arm_vabsq(p0) ({ __typeof(p0) __p0 = (p0); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ @@ -4677,13 +5827,15 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vsubq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vsubq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vsubq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vsubq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)));}) - -#define vbrsrq(p0,p1) __arm_vbrsrq(p0,p1) -#define __arm_vbrsrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_float16x8_t]: __arm_vbrsrq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), p1), \ - int (*)[__ARM_mve_type_float32x4_t]: __arm_vbrsrq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), p1));}) + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vsubq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vsubq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vsubq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vsubq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vsubq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsubq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsubq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vsubq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vsubq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) #define vshlq(p0,p1) __arm_vshlq(p0,p1) #define __arm_vshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ @@ -4714,6 +5866,370 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16x8_t]: __arm_vcvtq_n_f16_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \ int (*)[__ARM_mve_type_uint32x4_t]: __arm_vcvtq_n_f32_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));}) +#define vorrq(p0,p1) __arm_vorrq(p0,p1) +#define __arm_vorrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vorrq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vorrq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vorrq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vorrq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vorrq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vorrq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vorrq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vorrq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vabdq(p0,p1) __arm_vabdq(p0,p1) +#define __arm_vabdq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vabdq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vabdq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vabdq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vabdq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabdq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabdq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vabdq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vabdq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vaddq(p0,p1) __arm_vaddq(p0,p1) +#define __arm_vaddq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vaddq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vaddq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vaddq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vaddq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vaddq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vaddq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vaddq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));}) + +#define vandq(p0,p1) __arm_vandq(p0,p1) +#define __arm_vandq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vandq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vandq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vandq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vandq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vandq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vandq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vandq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vandq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vbicq(p0,p1) __arm_vbicq(p0,p1) +#define __arm_vbicq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vbicq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vbicq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vbicq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vbicq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbicq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbicq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vbicq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vbicq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vornq(p0,p1) __arm_vornq(p0,p1) +#define __arm_vornq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vornq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vornq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vornq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vornq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vornq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vornq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vornq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vornq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vmulq_n(p0,p1) __arm_vmulq_n(p0,p1) +#define __arm_vmulq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmulq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmulq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmulq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmulq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmulq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmulq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vmulq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vmulq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));}) + +#define vmulq(p0,p1) __arm_vmulq(p0,p1) +#define __arm_vmulq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vmulq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vmulq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vcaddq_rot270(p0,p1) __arm_vcaddq_rot270(p0,p1) +#define __arm_vcaddq_rot270(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcaddq_rot270_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcaddq_rot270_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcaddq_rot270_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcaddq_rot270_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcaddq_rot270_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcaddq_rot270_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcaddq_rot270_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcaddq_rot270_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vcmpeqq(p0,p1) __arm_vcmpeqq(p0,p1) +#define __arm_vcmpeqq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vcmpeqq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vcmpeqq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vcmpeqq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vcmpeqq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vcmpeqq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vcmpeqq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vcmpeqq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vcmpeqq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpeqq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpeqq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpeqq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpeqq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpeqq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpeqq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpeqq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpeqq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vcaddq_rot90(p0,p1) __arm_vcaddq_rot90(p0,p1) +#define __arm_vcaddq_rot90(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcaddq_rot90_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcaddq_rot90_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcaddq_rot90_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcaddq_rot90_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcaddq_rot90_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcaddq_rot90_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcaddq_rot90_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcaddq_rot90_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vcmpgeq_n(p0,p1) __arm_vcmpgeq_n(p0,p1) +#define __arm_vcmpgeq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vcmpgeq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vcmpgeq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vcmpgeq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vcmpgeq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vcmpgeq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));}) + +#define vcmpgeq(p0,p1) __arm_vcmpgeq(p0,p1) +#define __arm_vcmpgeq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpgeq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpgeq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpgeq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgeq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgeq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vcmpgtq_n(p0,p1) __arm_vcmpgtq_n(p0,p1) +#define __arm_vcmpgtq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vcmpgtq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vcmpgtq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vcmpgtq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vcmpgtq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vcmpgtq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));}) + +#define vcmpgtq(p0,p1) __arm_vcmpgtq(p0,p1) +#define __arm_vcmpgtq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpgtq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpgtq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpgtq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgtq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgtq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vcmpleq(p0,p1) __arm_vcmpleq(p0,p1) +#define __arm_vcmpleq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpleq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpleq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpleq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpleq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpleq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vcmpleq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vcmpleq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vcmpleq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vcmpleq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vcmpleq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));}) + +#define vcmpltq(p0,p1) __arm_vcmpltq(p0,p1) +#define __arm_vcmpltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpltq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpltq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpltq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vcmpltq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vcmpltq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vcmpltq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpltq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpltq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vcmpltq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vcmpltq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));}) + +#define vcmpneq(p0,p1) __arm_vcmpneq(p0,p1) +#define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vcmpneq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vcmpneq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpneq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpneq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpneq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpneq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpneq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vcmulq(p0,p1) __arm_vcmulq(p0,p1) +#define __arm_vcmulq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vcmulq_rot180(p0,p1) __arm_vcmulq_rot180(p0,p1) +#define __arm_vcmulq_rot180(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_rot180_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_rot180_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vcmulq_rot270(p0,p1) __arm_vcmulq_rot270(p0,p1) +#define __arm_vcmulq_rot270(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_rot270_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_rot270_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vcmulq_rot90(p0,p1) __arm_vcmulq_rot90(p0,p1) +#define __arm_vcmulq_rot90(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_rot90_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_rot90_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define veorq(p0,p1) __arm_veorq(p0,p1) +#define __arm_veorq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_veorq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_veorq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_veorq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_veorq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_veorq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_veorq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_veorq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_veorq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vmaxnmaq(p0,p1) __arm_vmaxnmaq(p0,p1) +#define __arm_vmaxnmaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vmaxnmaq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vmaxnmaq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vmaxnmavq(p0,p1) __arm_vmaxnmavq(p0,p1) +#define __arm_vmaxnmavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_float16_t][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32_t][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vmaxnmq(p0,p1) __arm_vmaxnmq(p0,p1) +#define __arm_vmaxnmq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vmaxnmq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vmaxnmq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vmaxnmvq(p0,p1) __arm_vmaxnmvq(p0,p1) +#define __arm_vmaxnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_float16_t][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32_t][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vmaxnmvq(p0,p1) __arm_vmaxnmvq(p0,p1) +#define __arm_vmaxnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_float16_t][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32_t][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vminnmaq(p0,p1) __arm_vminnmaq(p0,p1) +#define __arm_vminnmaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vminnmaq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vminnmaq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vminnmavq(p0,p1) __arm_vminnmavq(p0,p1) +#define __arm_vminnmavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_float16_t][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32_t][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vbrsrq(p0,p1) __arm_vbrsrq(p0,p1) +#define __arm_vbrsrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8x16_t]: __arm_vbrsrq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ + int (*)[__ARM_mve_type_int16x8_t]: __arm_vbrsrq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vbrsrq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \ + int (*)[__ARM_mve_type_uint8x16_t]: __arm_vbrsrq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \ + int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbrsrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1), \ + int (*)[__ARM_mve_type_float16x8_t]: __arm_vbrsrq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), p1), \ + int (*)[__ARM_mve_type_float32x4_t]: __arm_vbrsrq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), p1));}) + +#define vminnmq(p0,p1) __arm_vminnmq(p0,p1) +#define __arm_vminnmq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vminnmq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vminnmq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + +#define vminnmvq(p0,p1) __arm_vminnmvq(p0,p1) +#define __arm_vminnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_float16_t][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_float32_t][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + #define vcmpgeq(p0,p1) __arm_vcmpgeq(p0,p1) #define __arm_vcmpgeq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -4729,6 +6245,422 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vcmpgeq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vcmpgeq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));}) +#define vshlq_r(p0,p1) __arm_vshlq_r(p0,p1) +#define __arm_vshlq_r(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_r_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ + int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_r_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_r_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \ + int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_r_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \ + int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));}) + +#define vshlq_n(p0,p1) __arm_vshlq_n(p0,p1) +#define __arm_vshlq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ + int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \ + int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \ + int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));}) + +#define vshlltq(p0,p1) __arm_vshlltq(p0,p1) +#define __arm_vshlltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlltq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ + int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlltq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ + int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlltq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \ + int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlltq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1));}) + +#define vshllbq(p0,p1) __arm_vshllbq(p0,p1) +#define __arm_vshllbq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8x16_t]: __arm_vshllbq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ + int (*)[__ARM_mve_type_int16x8_t]: __arm_vshllbq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ + int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshllbq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \ + int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshllbq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1));}) + +#define vrshrq(p0,p1) __arm_vrshrq(p0,p1) +#define __arm_vrshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ + int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshrq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vrshrq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \ + int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrshrq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \ + int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));}) + +#define vrshrq(p0,p1) __arm_vrshrq(p0,p1) +#define __arm_vrshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ + int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshrq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vrshrq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \ + int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrshrq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \ + int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));}) + +#define vrshlq(p0,p1) __arm_vrshlq(p0,p1) +#define __arm_vrshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vrmulhq(p0,p1) __arm_vrmulhq(p0,p1) +#define __arm_vrmulhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrmulhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrmulhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrmulhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vrmulhq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vrmulhq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrmulhq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vrhaddq(p0,p1) __arm_vrhaddq(p0,p1) +#define __arm_vrhaddq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrhaddq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrhaddq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrhaddq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vrhaddq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vrhaddq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrhaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vqsubq(p0,p1) __arm_vqsubq(p0,p1) +#define __arm_vqsubq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqsubq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqsubq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqsubq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vqsubq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vqsubq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vqsubq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqsubq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqsubq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqsubq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vqsubq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vqsubq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vqsubq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vqshluq(p0,p1) __arm_vqshluq(p0,p1) +#define __arm_vqshluq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshluq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ + int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshluq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshluq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1));}) + +#define vqshlq(p0,p1) __arm_vqshlq(p0,p1) +#define __arm_vqshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vqshlq_r(p0,p1) __arm_vqshlq_r(p0,p1) +#define __arm_vqshlq_r(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_r_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ + int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshlq_r_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshlq_r_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \ + int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqshlq_r_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \ + int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));}) + +#define vqshlq_n(p0,p1) __arm_vqshlq_n(p0,p1) +#define __arm_vqshlq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ + int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \ + int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \ + int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));}) + +#define vqrshlq(p0,p1) __arm_vqrshlq(p0,p1) +#define __arm_vqrshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32_t)));}) + +#define vqrdmulhq(p0,p1) __arm_vqrdmulhq(p0,p1) +#define __arm_vqrdmulhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmulhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmulhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmulhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqrdmulhq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqrdmulhq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqrdmulhq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)));}) + +#define vmlaldavxq(p0,p1) __arm_vmlaldavxq(p0,p1) +#define __arm_vmlaldavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vqmovuntq(p0,p1) __arm_vqmovuntq(p0,p1) +#define __arm_vqmovuntq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqmovuntq_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqmovuntq_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vqmovntq(p0,p1) __arm_vqmovntq(p0,p1) +#define __arm_vqmovntq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqmovntq_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqmovntq_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqmovntq_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqmovntq_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vqmovnbq(p0,p1) __arm_vqmovnbq(p0,p1) +#define __arm_vqmovnbq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqmovnbq_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqmovnbq_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqmovnbq_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqmovnbq_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vqdmulltq(p0,p1) __arm_vqdmulltq(p0,p1) +#define __arm_vqdmulltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmulltq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmulltq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulltq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulltq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vqmovunbq(p0,p1) __arm_vqmovunbq(p0,p1) +#define __arm_vqmovunbq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqmovunbq_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqmovunbq_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vqdmullbq(p0,p1) __arm_vqdmullbq(p0,p1) +#define __arm_vqdmullbq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmullbq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmullbq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmullbq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmullbq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vqdmulhq(p0,p1) __arm_vqdmulhq(p0,p1) +#define __arm_vqdmulhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqdmulhq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmulhq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmulhq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmulhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vqaddq(p0,p1) __arm_vqaddq(p0,p1) +#define __arm_vqaddq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vqaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vqaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vqaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqaddq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqaddq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqaddq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vqaddq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vqaddq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vqaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vmulltq_poly(p0,p1) __arm_vmulltq_poly(p0,p1) +#define __arm_vmulltq_poly(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_p8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_p16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)));}) + +#define vmullbq_poly(p0,p1) __arm_vmullbq_poly(p0,p1) +#define __arm_vmullbq_poly(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_p8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_p16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)));}) + +#define vmulltq_int(p0,p1) __arm_vmulltq_int(p0,p1) +#define __arm_vmulltq_int(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulltq_int_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulltq_int_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulltq_int_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_int_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_int_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulltq_int_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vhaddq(p0,p1) __arm_vhaddq(p0,p1) +#define __arm_vhaddq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vhaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vhaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vhaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vhaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vhaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vhaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhaddq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhaddq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhaddq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vhaddq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vhaddq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vhaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vhcaddq_rot270(p0,p1) __arm_vhcaddq_rot270(p0,p1) +#define __arm_vhcaddq_rot270(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhcaddq_rot270_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhcaddq_rot270_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhcaddq_rot270_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vhcaddq_rot90(p0,p1) __arm_vhcaddq_rot90(p0,p1) +#define __arm_vhcaddq_rot90(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhcaddq_rot90_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhcaddq_rot90_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhcaddq_rot90_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vhsubq(p0,p1) __arm_vhsubq(p0,p1) +#define __arm_vhsubq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vhsubq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vhsubq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vhsubq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vhsubq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vhsubq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vhsubq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhsubq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhsubq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhsubq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vhsubq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vhsubq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vhsubq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vminq(p0,p1) __arm_vminq(p0,p1) +#define __arm_vminq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vminq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vminq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vminq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vminq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vminaq(p0,p1) __arm_vminaq(p0,p1) +#define __arm_vminaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vminaq_s8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminaq_s16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminaq_s32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vmaxq(p0,p1) __arm_vmaxq(p0,p1) +#define __arm_vmaxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmaxq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmaxq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmaxq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmaxq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vmaxaq(p0,p1) __arm_vmaxaq(p0,p1) +#define __arm_vmaxaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmaxaq_s8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxaq_s16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxaq_s32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vmovntq(p0,p1) __arm_vmovntq(p0,p1) +#define __arm_vmovntq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vmovntq_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vmovntq_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vmovntq_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vmovntq_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vmovnbq(p0,p1) __arm_vmovnbq(p0,p1) +#define __arm_vmovnbq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vmovnbq_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vmovnbq_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vmovnbq_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vmovnbq_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vmulhq(p0,p1) __arm_vmulhq(p0,p1) +#define __arm_vmulhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulhq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulhq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulhq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vmullbq_int(p0,p1) __arm_vmullbq_int(p0,p1) +#define __arm_vmullbq_int(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmullbq_int_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmullbq_int_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmullbq_int_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_int_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_int_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmullbq_int_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + #define vcmpgtq(p0,p1) __arm_vcmpgtq(p0,p1) #define __arm_vcmpgtq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -4744,7 +6676,7 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vcmpgtq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vcmpgtq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));}) -#else /* MVE Interger. */ +#else /* MVE Interger. srinath*/ #define vst4q(p0,p1) __arm_vst4q(p0,p1) #define __arm_vst4q(p0,p1) ({ __typeof(p0) __p0 = (p0); \ @@ -4915,12 +6847,7 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vsubq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vsubq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsubq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsubq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) - -#define vsubq_n(p0,p1) __arm_vsubq_n(p0,p1) -#define __arm_vsubq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsubq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vsubq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vsubq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vsubq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ @@ -4938,43 +6865,16 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \ int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));}) -#define vrshlq_n(p0,p1) __arm_vrshlq_n(p0,p1) -#define __arm_vrshlq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int32_t)), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32_t)), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int32_t)), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32_t)));}) - #define vrshlq(p0,p1) __arm_vrshlq(p0,p1) #define __arm_vrshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) - -#define vrshlq_n(p0,p1) __arm_vrshlq_n(p0,p1) -#define __arm_vrshlq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int32_t)), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int32_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32_t)));}) - -#define vrshlq(p0,p1) __arm_vrshlq(p0,p1) -#define __arm_vrshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32_t]: __arm_vrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ @@ -5004,8 +6904,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vrhaddq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrhaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) -#define vqsubq_n(p0,p1) __arm_vqsubq_n(p0,p1) -#define __arm_vqsubq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ +#define vqsubq(p0,p1) __arm_vqsubq(p0,p1) +#define __arm_vqsubq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqsubq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ @@ -5013,12 +6913,7 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqsubq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vqsubq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vqsubq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vqsubq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)));}) - -#define vqsubq(p0,p1) __arm_vqsubq(p0,p1) -#define __arm_vqsubq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vqsubq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqsubq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqsubq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqsubq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ @@ -5054,8 +6949,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshluq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshluq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1));}) -#define vrshrq_n(p0,p1) __arm_vrshrq_n(p0,p1) -#define __arm_vrshrq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ +#define vrshrq(p0,p1) __arm_vrshrq(p0,p1) +#define __arm_vrshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshrq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ @@ -5074,13 +6969,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \ int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));}) -#define vqshluq_n(p0,p1) __arm_vqshluq_n(p0,p1) -#define __arm_vqshluq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshluq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ - int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshluq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ - int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshluq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1));}) - #define vqshlq_n(p0,p1) __arm_vqshlq_n(p0,p1) #define __arm_vqshlq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ @@ -5091,17 +6979,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \ int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));}) -#define vqrshlq_n(p0,p1) __arm_vqrshlq_n(p0,p1) -#define __arm_vqrshlq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int32_t)), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32_t)), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int32_t)), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32_t)));}) - #define vqrshlq(p0,p1) __arm_vqrshlq(p0,p1) #define __arm_vqrshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -5111,15 +6988,13 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) - -#define vqrdmulhq_n(p0,p1) __arm_vqrdmulhq_n(p0,p1) -#define __arm_vqrdmulhq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqrdmulhq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqrdmulhq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqrdmulhq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)));}) + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32_t]: __arm_vqrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32_t)));}) #define vqrdmulhq(p0,p1) __arm_vqrdmulhq(p0,p1) #define __arm_vqrdmulhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ @@ -5127,26 +7002,24 @@ extern void *__ARM_undef; _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmulhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmulhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmulhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) - -#define vqdmulhq_n(p0,p1) __arm_vqdmulhq_n(p0,p1) -#define __arm_vqdmulhq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqdmulhq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmulhq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmulhq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)));}) + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmulhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqrdmulhq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqrdmulhq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqrdmulhq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)));}) #define vqdmulhq(p0,p1) __arm_vqdmulhq(p0,p1) #define __arm_vqdmulhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqdmulhq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmulhq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmulhq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmulhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) -#define vqaddq_n(p0,p1) __arm_vqaddq_n(p0,p1) -#define __arm_vqaddq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ +#define vqaddq(p0,p1) __arm_vqaddq(p0,p1) +#define __arm_vqaddq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ @@ -5154,12 +7027,7 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vqaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vqaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vqaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)));}) - -#define vqaddq(p0,p1) __arm_vqaddq(p0,p1) -#define __arm_vqaddq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vqaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqaddq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqaddq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqaddq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ @@ -5167,15 +7035,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vqaddq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vqaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) -#define vorrq_n(p0,p1) __arm_vorrq_n(p0,p1) -#define __arm_vorrq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32_t]: __arm_vorrq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int)), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vorrq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int)), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32_t]: __arm_vorrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32_t]: __arm_vorrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int)));}) - #define vorrq(p0,p1) __arm_vorrq(p0,p1) #define __arm_vorrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -5291,8 +7150,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxaq_s16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxaq_s32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) -#define vhsubq_n(p0,p1) __arm_vhsubq_n(p0,p1) -#define __arm_vhsubq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ +#define vhsubq(p0,p1) __arm_vhsubq(p0,p1) +#define __arm_vhsubq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vhsubq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ @@ -5300,12 +7159,7 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vhsubq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vhsubq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vhsubq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vhsubq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)));}) - -#define vhsubq(p0,p1) __arm_vhsubq(p0,p1) -#define __arm_vhsubq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vhsubq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhsubq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhsubq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhsubq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ @@ -5329,8 +7183,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhcaddq_rot270_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhcaddq_rot270_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) -#define vhaddq_n(p0,p1) __arm_vhaddq_n(p0,p1) -#define __arm_vhaddq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ +#define vhaddq(p0,p1) __arm_vhaddq(p0,p1) +#define __arm_vhaddq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vhaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ @@ -5338,12 +7192,7 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vhaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vhaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vhaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vhaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)));}) - -#define vhaddq(p0,p1) __arm_vhaddq(p0,p1) -#define __arm_vhaddq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vhaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhaddq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhaddq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhaddq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ @@ -5414,12 +7263,7 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vaddq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vaddq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vaddq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) - -#define vaddq(p0,p1) __arm_vaddq(p0,p1) -#define __arm_vaddq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ @@ -5591,6 +7435,129 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)));}) + +#define vqmovntq(p0,p1) __arm_vqmovntq(p0,p1) +#define __arm_vqmovntq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqmovntq_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqmovntq_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqmovntq_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqmovntq_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vqmovnbq(p0,p1) __arm_vqmovnbq(p0,p1) +#define __arm_vqmovnbq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqmovnbq_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqmovnbq_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqmovnbq_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqmovnbq_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vmulltq_poly(p0,p1) __arm_vmulltq_poly(p0,p1) +#define __arm_vmulltq_poly(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_p8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_p16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)));}) + +#define vmullbq_poly(p0,p1) __arm_vmullbq_poly(p0,p1) +#define __arm_vmullbq_poly(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_p8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_p16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)));}) + +#define vmovntq(p0,p1) __arm_vmovntq(p0,p1) +#define __arm_vmovntq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vmovntq_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vmovntq_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vmovntq_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vmovntq_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vmovnbq(p0,p1) __arm_vmovnbq(p0,p1) +#define __arm_vmovnbq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vmovnbq_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vmovnbq_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vmovnbq_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vmovnbq_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vmlaldavxq(p0,p1) __arm_vmlaldavxq(p0,p1) +#define __arm_vmlaldavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vqmovuntq(p0,p1) __arm_vqmovuntq(p0,p1) +#define __arm_vqmovuntq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqmovuntq_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqmovuntq_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vshlltq(p0,p1) __arm_vshlltq(p0,p1) +#define __arm_vshlltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlltq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ + int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlltq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ + int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlltq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \ + int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlltq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1));}) + +#define vshllbq(p0,p1) __arm_vshllbq(p0,p1) +#define __arm_vshllbq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8x16_t]: __arm_vshllbq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ + int (*)[__ARM_mve_type_int16x8_t]: __arm_vshllbq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ + int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshllbq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \ + int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshllbq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1));}) + +#define vmlaldavq(p0,p1) __arm_vmlaldavq(p0,p1) +#define __arm_vmlaldavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmlaldavq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmlaldavq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vqmovunbq(p0,p1) __arm_vqmovunbq(p0,p1) +#define __arm_vqmovunbq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqmovunbq_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqmovunbq_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vqdmulltq(p0,p1) __arm_vqdmulltq(p0,p1) +#define __arm_vqdmulltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmulltq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmulltq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulltq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulltq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vqdmullbq(p0,p1) __arm_vqdmullbq(p0,p1) +#define __arm_vqdmullbq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmullbq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmullbq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmullbq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmullbq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vcmpgeq_n(p0,p1) __arm_vcmpgeq_n(p0,p1) +#define __arm_vcmpgeq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vcmpgeq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vcmpgeq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vcmpgeq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)));}) + #define vcmpgeq(p0,p1) __arm_vcmpgeq(p0,p1) #define __arm_vcmpgeq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -5646,6 +7613,46 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vcmpltq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vcmpltq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)));}) +#define vaddlvaq(p0,p1) __arm_vaddlvaq(p0,p1) +#define __arm_vaddlvaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int64_t][__ARM_mve_type_int32x4_t]: __arm_vaddlvaq_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint64_t][__ARM_mve_type_uint32x4_t]: __arm_vaddlvaq_u32 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vrmlaldavhq(p0,p1) __arm_vrmlaldavhq(p0,p1) +#define __arm_vrmlaldavhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrmlaldavhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrmlaldavhq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + +#define vrmlaldavhxq(p0,p1) __arm_vrmlaldavhxq(p0,p1) +#define __arm_vrmlaldavhxq(p0,p1) __arm_vrmlaldavhxq_s32(p0,p1) + +#define vrmlsldavhq(p0,p1) __arm_vrmlsldavhq(p0,p1) +#define __arm_vrmlsldavhq(p0,p1) __arm_vrmlsldavhq_s32(p0,p1) + +#define vrmlsldavhq(p0,p1) __arm_vrmlsldavhq(p0,p1) +#define __arm_vrmlsldavhq(p0,p1) __arm_vrmlsldavhq_s32(p0,p1) + +#define vrmlsldavhxq(p0,p1) __arm_vrmlsldavhxq(p0,p1) +#define __arm_vrmlsldavhxq(p0,p1) __arm_vrmlsldavhxq_s32(p0,p1) + +#define vmlsldavxq(p0,p1) __arm_vmlsldavxq(p0,p1) +#define __arm_vmlsldavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + +#define vmlsldavq(p0,p1) __arm_vmlsldavq(p0,p1) +#define __arm_vmlsldavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + #endif /* MVE Floating point. */ #ifdef __cplusplus diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index 550a67f920b..7129b99bc99 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -18,198 +18,276 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ -VAR5 (STORE1, vst4q, v16qi, v8hi, v4si, v8hf, v4sf) -VAR2 (UNOP_NONE_NONE, vrndxq_f, v8hf, v4sf) -VAR2 (UNOP_NONE_NONE, vrndq_f, v8hf, v4sf) -VAR2 (UNOP_NONE_NONE, vrndpq_f, v8hf, v4sf) -VAR2 (UNOP_NONE_NONE, vrndnq_f, v8hf, v4sf) -VAR2 (UNOP_NONE_NONE, vrndmq_f, v8hf, v4sf) -VAR2 (UNOP_NONE_NONE, vrndaq_f, v8hf, v4sf) -VAR2 (UNOP_NONE_NONE, vrev64q_f, v8hf, v4sf) -VAR2 (UNOP_NONE_NONE, vnegq_f, v8hf, v4sf) -VAR2 (UNOP_NONE_NONE, vdupq_n_f, v8hf, v4sf) -VAR2 (UNOP_NONE_NONE, vabsq_f, v8hf, v4sf) -VAR1 (UNOP_NONE_NONE, vrev32q_f, v8hf) -VAR1 (UNOP_NONE_NONE, vcvttq_f32_f16, v4sf) -VAR1 (UNOP_NONE_NONE, vcvtbq_f32_f16, v4sf) -VAR2 (UNOP_NONE_SNONE, vcvtq_to_f_s, v8hf, v4sf) -VAR2 (UNOP_NONE_UNONE, vcvtq_to_f_u, v8hf, v4sf) -VAR3 (UNOP_SNONE_SNONE, vrev64q_s, v16qi, v8hi, v4si) -VAR3 (UNOP_SNONE_SNONE, vqnegq_s, v16qi, v8hi, v4si) -VAR3 (UNOP_SNONE_SNONE, vqabsq_s, v16qi, v8hi, v4si) -VAR3 (UNOP_SNONE_SNONE, vnegq_s, v16qi, v8hi, v4si) -VAR3 (UNOP_SNONE_SNONE, vmvnq_s, v16qi, v8hi, v4si) -VAR3 (UNOP_SNONE_SNONE, vdupq_n_s, v16qi, v8hi, v4si) -VAR3 (UNOP_SNONE_SNONE, vclzq_s, v16qi, v8hi, v4si) -VAR3 (UNOP_SNONE_SNONE, vclsq_s, v16qi, v8hi, v4si) -VAR3 (UNOP_SNONE_SNONE, vaddvq_s, v16qi, v8hi, v4si) -VAR3 (UNOP_SNONE_SNONE, vabsq_s, v16qi, v8hi, v4si) -VAR2 (UNOP_SNONE_SNONE, vrev32q_s, v16qi, v8hi) -VAR2 (UNOP_SNONE_SNONE, vmovltq_s, v16qi, v8hi) -VAR2 (UNOP_SNONE_SNONE, vmovlbq_s, v16qi, v8hi) -VAR2 (UNOP_SNONE_NONE, vcvtq_from_f_s, v8hi, v4si) -VAR2 (UNOP_SNONE_NONE, vcvtpq_s, v8hi, v4si) -VAR2 (UNOP_SNONE_NONE, vcvtnq_s, v8hi, v4si) -VAR2 (UNOP_SNONE_NONE, vcvtmq_s, v8hi, v4si) -VAR2 (UNOP_SNONE_NONE, vcvtaq_s, v8hi, v4si) -VAR2 (UNOP_SNONE_IMM, vmvnq_n_s, v8hi, v4si) -VAR1 (UNOP_SNONE_SNONE, vrev16q_s, v16qi) -VAR1 (UNOP_SNONE_SNONE, vaddlvq_s, v4si) -VAR3 (UNOP_UNONE_UNONE, vrev64q_u, v16qi, v8hi, v4si) -VAR3 (UNOP_UNONE_UNONE, vmvnq_u, v16qi, v8hi, v4si) -VAR3 (UNOP_UNONE_UNONE, vdupq_n_u, v16qi, v8hi, v4si) -VAR3 (UNOP_UNONE_UNONE, vclzq_u, v16qi, v8hi, v4si) -VAR3 (UNOP_UNONE_UNONE, vaddvq_u, v16qi, v8hi, v4si) -VAR2 (UNOP_UNONE_UNONE, vrev32q_u, v16qi, v8hi) -VAR2 (UNOP_UNONE_UNONE, vmovltq_u, v16qi, v8hi) -VAR2 (UNOP_UNONE_UNONE, vmovlbq_u, v16qi, v8hi) -VAR2 (UNOP_UNONE_NONE, vcvtq_from_f_u, v8hi, v4si) -VAR2 (UNOP_UNONE_NONE, vcvtpq_u, v8hi, v4si) -VAR2 (UNOP_UNONE_NONE, vcvtnq_u, v8hi, v4si) -VAR2 (UNOP_UNONE_NONE, vcvtmq_u, v8hi, v4si) -VAR2 (UNOP_UNONE_NONE, vcvtaq_u, v8hi, v4si) -VAR2 (UNOP_UNONE_IMM, vmvnq_n_u, v8hi, v4si) -VAR1 (UNOP_UNONE_UNONE, vrev16q_u, v16qi) -VAR1 (UNOP_UNONE_UNONE, vaddlvq_u, v4si) -VAR1 (UNOP_UNONE_UNONE, vctp16q, hi) -VAR1 (UNOP_UNONE_UNONE, vctp32q, hi) -VAR1 (UNOP_UNONE_UNONE, vctp64q, hi) -VAR1 (UNOP_UNONE_UNONE, vctp8q, hi) -VAR1 (UNOP_UNONE_UNONE, vpnot, hi) -VAR2 (BINOP_NONE_NONE_NONE, vsubq_n_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vbrsrq_n_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_IMM, vcvtq_n_to_f_s, v8hf, v4sf) -VAR2 (BINOP_NONE_UNONE_IMM, vcvtq_n_to_f_u, v8hf, v4sf) -VAR2 (BINOP_NONE_UNONE_UNONE, vcreateq_f, v8hf, v4sf) -VAR2 (BINOP_UNONE_NONE_IMM, vcvtq_n_from_f_u, v8hi, v4si) -VAR2 (BINOP_NONE_NONE_IMM, vcvtq_n_from_f_s, v8hi, v4si) -VAR4 (BINOP_UNONE_UNONE_UNONE, vcreateq_u, v16qi, v8hi, v4si, v2di) -VAR4 (BINOP_NONE_UNONE_UNONE, vcreateq_s, v16qi, v8hi, v4si, v2di) -VAR3 (BINOP_UNONE_UNONE_IMM, vshrq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_IMM, vshrq_n_s, v16qi, v8hi, v4si) -VAR1 (BINOP_NONE_NONE_UNONE, vaddlvq_p_s, v4si) -VAR1 (BINOP_UNONE_UNONE_UNONE, vaddlvq_p_u, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpneq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpneq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vshlq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_NONE, vshlq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vsubq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vsubq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vrmulhq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vrhaddq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vqsubq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vqsubq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vqaddq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vqaddq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vorrq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vornq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vmulq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vmulq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vmulltq_int_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vmullbq_int_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vmulhq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vmladavq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vminvq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vminq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vmaxvq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vmaxq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vhsubq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vhsubq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vhaddq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vhaddq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, veorq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpneq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmphiq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmphiq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot90_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot270_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vaddvq_p_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vaddvaq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vaddq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vabdq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_NONE, vshlq_r_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_NONE, vrshlq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_NONE, vrshlq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_NONE, vqshlq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_NONE, vqshlq_r_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_NONE, vqrshlq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_NONE, vqrshlq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_NONE, vminavq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_NONE, vminaq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_NONE, vmaxavq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_NONE, vmaxaq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_NONE, vbrsrq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_IMM, vshlq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_IMM, vrshrq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_IMM, vqshlq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpneq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpltq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpltq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpleq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpleq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpgtq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpgtq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpgeq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpgeq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpeqq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpeqq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_IMM, vqshluq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_UNONE, vaddvq_p_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vsubq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vsubq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vshlq_r_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vrshlq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vrshlq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vrmulhq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vrhaddq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vqsubq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vqsubq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vqshlq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vqshlq_r_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vqrshlq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vqrshlq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vqrdmulhq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vqrdmulhq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vqdmulhq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vqdmulhq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vqaddq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vqaddq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vorrq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vornq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vmulq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vmulq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vmulltq_int_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vmullbq_int_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vmulhq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vmlsdavxq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vmlsdavq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vmladavxq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vmladavq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vminvq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vminq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vmaxvq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vmaxq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vhsubq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vhsubq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vhcaddq_rot90_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vhcaddq_rot270_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vhaddq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vhaddq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, veorq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot90_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot270_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vbrsrq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vbicq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vandq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vaddvaq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vaddq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vabdq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_IMM, vshlq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_IMM, vrshrq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_IMM, vqshlq_n_s, v16qi, v8hi, v4si) +VAR5(STORE1, vst4q, v16qi, v8hi, v4si, v8hf, v4sf) +VAR2(UNOP_NONE_NONE, vrndxq_f, v8hf, v4sf) +VAR2(UNOP_NONE_NONE, vrndq_f, v8hf, v4sf) +VAR2(UNOP_NONE_NONE, vrndpq_f, v8hf, v4sf) +VAR2(UNOP_NONE_NONE, vrndnq_f, v8hf, v4sf) +VAR2(UNOP_NONE_NONE, vrndmq_f, v8hf, v4sf) +VAR2(UNOP_NONE_NONE, vrndaq_f, v8hf, v4sf) +VAR2(UNOP_NONE_NONE, vrev64q_f, v8hf, v4sf) +VAR2(UNOP_NONE_NONE, vnegq_f, v8hf, v4sf) +VAR2(UNOP_NONE_NONE, vdupq_n_f, v8hf, v4sf) +VAR2(UNOP_NONE_NONE, vabsq_f, v8hf, v4sf) +VAR1(UNOP_NONE_NONE, vrev32q_f, v8hf) +VAR1(UNOP_NONE_NONE, vcvttq_f32_f16, v4sf) +VAR1(UNOP_NONE_NONE, vcvtbq_f32_f16, v4sf) +VAR2(UNOP_NONE_SNONE, vcvtq_to_f_s, v8hf, v4sf) +VAR2(UNOP_NONE_UNONE, vcvtq_to_f_u, v8hf, v4sf) +VAR3(UNOP_SNONE_SNONE, vrev64q_s, v16qi, v8hi, v4si) +VAR3(UNOP_SNONE_SNONE, vqnegq_s, v16qi, v8hi, v4si) +VAR3(UNOP_SNONE_SNONE, vqabsq_s, v16qi, v8hi, v4si) +VAR3(UNOP_SNONE_SNONE, vnegq_s, v16qi, v8hi, v4si) +VAR3(UNOP_SNONE_SNONE, vmvnq_s, v16qi, v8hi, v4si) +VAR3(UNOP_SNONE_SNONE, vdupq_n_s, v16qi, v8hi, v4si) +VAR3(UNOP_SNONE_SNONE, vclzq_s, v16qi, v8hi, v4si) +VAR3(UNOP_SNONE_SNONE, vclsq_s, v16qi, v8hi, v4si) +VAR3(UNOP_SNONE_SNONE, vaddvq_s, v16qi, v8hi, v4si) +VAR3(UNOP_SNONE_SNONE, vabsq_s, v16qi, v8hi, v4si) +VAR2(UNOP_SNONE_SNONE, vrev32q_s, v16qi, v8hi) +VAR2(UNOP_SNONE_SNONE, vmovltq_s, v16qi, v8hi) +VAR2(UNOP_SNONE_SNONE, vmovlbq_s, v16qi, v8hi) +VAR2(UNOP_SNONE_NONE, vcvtq_from_f_s, v8hi, v4si) +VAR2(UNOP_SNONE_NONE, vcvtpq_s, v8hi, v4si) +VAR2(UNOP_SNONE_NONE, vcvtnq_s, v8hi, v4si) +VAR2(UNOP_SNONE_NONE, vcvtmq_s, v8hi, v4si) +VAR2(UNOP_SNONE_NONE, vcvtaq_s, v8hi, v4si) +VAR2(UNOP_SNONE_IMM, vmvnq_n_s, v8hi, v4si) +VAR1(UNOP_SNONE_SNONE, vrev16q_s, v16qi) +VAR1(UNOP_SNONE_SNONE, vaddlvq_s, v4si) +VAR3(UNOP_UNONE_UNONE, vrev64q_u, v16qi, v8hi, v4si) +VAR3(UNOP_UNONE_UNONE, vmvnq_u, v16qi, v8hi, v4si) +VAR3(UNOP_UNONE_UNONE, vdupq_n_u, v16qi, v8hi, v4si) +VAR3(UNOP_UNONE_UNONE, vclzq_u, v16qi, v8hi, v4si) +VAR3(UNOP_UNONE_UNONE, vaddvq_u, v16qi, v8hi, v4si) +VAR2(UNOP_UNONE_UNONE, vrev32q_u, v16qi, v8hi) +VAR2(UNOP_UNONE_UNONE, vmovltq_u, v16qi, v8hi) +VAR2(UNOP_UNONE_UNONE, vmovlbq_u, v16qi, v8hi) +VAR2(UNOP_UNONE_NONE, vcvtq_from_f_u, v8hi, v4si) +VAR2(UNOP_UNONE_NONE, vcvtpq_u, v8hi, v4si) +VAR2(UNOP_UNONE_NONE, vcvtnq_u, v8hi, v4si) +VAR2(UNOP_UNONE_NONE, vcvtmq_u, v8hi, v4si) +VAR2(UNOP_UNONE_NONE, vcvtaq_u, v8hi, v4si) +VAR2(UNOP_UNONE_IMM, vmvnq_n_u, v8hi, v4si) +VAR1(UNOP_UNONE_UNONE, vrev16q_u, v16qi) +VAR1(UNOP_UNONE_UNONE, vaddlvq_u, v4si) +VAR1(UNOP_UNONE_UNONE, vctp16q, hi) +VAR1(UNOP_UNONE_UNONE, vctp32q, hi) +VAR1(UNOP_UNONE_UNONE, vctp64q, hi) +VAR1(UNOP_UNONE_UNONE, vctp8q, hi) +VAR1(UNOP_UNONE_UNONE, vpnot, hi) +VAR2(BINOP_NONE_NONE_NONE, vsubq_n_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vbrsrq_n_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_IMM, vcvtq_n_to_f_s, v8hf, v4sf) +VAR2(BINOP_NONE_UNONE_IMM, vcvtq_n_to_f_u, v8hf, v4sf) +VAR2(BINOP_NONE_UNONE_UNONE, vcreateq_f, v8hf, v4sf) +VAR2(BINOP_UNONE_NONE_IMM, vcvtq_n_from_f_u, v8hi, v4si) +VAR2(BINOP_NONE_NONE_IMM, vcvtq_n_from_f_s, v8hi, v4si) +VAR4(BINOP_UNONE_UNONE_UNONE, vcreateq_u, v16qi, v8hi, v4si, v2di) +VAR4(BINOP_NONE_UNONE_UNONE, vcreateq_s, v16qi, v8hi, v4si, v2di) +VAR3(BINOP_UNONE_UNONE_IMM, vshrq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_IMM, vshrq_n_s, v16qi, v8hi, v4si) +VAR1(BINOP_NONE_NONE_UNONE, vaddlvq_p_s, v4si) +VAR1(BINOP_UNONE_UNONE_UNONE, vaddlvq_p_u, v4si) +VAR3(BINOP_UNONE_NONE_NONE, vcmpneq_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vcmpneq_u, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vshlq_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_NONE, vshlq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vsubq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vsubq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vrmulhq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vrhaddq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vqsubq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vqsubq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vqaddq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vqaddq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vorrq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vornq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vmulq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vmulq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vmulltq_int_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vmullbq_int_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vmulhq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vmladavq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vminvq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vminq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vmaxvq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vmaxq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vhsubq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vhsubq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vhaddq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vhaddq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, veorq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vcmpneq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vcmphiq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vcmphiq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vcmpeqq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vcmpeqq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vcmpcsq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vcaddq_rot90_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vcaddq_rot270_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vaddvq_p_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vaddvaq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vaddq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_UNONE, vabdq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_NONE, vshlq_r_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_NONE, vrshlq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_NONE, vrshlq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_NONE, vqshlq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_NONE, vqshlq_r_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_NONE, vqrshlq_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_NONE, vqrshlq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_NONE, vminavq_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_NONE, vminaq_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_NONE, vmaxavq_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_NONE, vmaxaq_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_NONE, vbrsrq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_IMM, vshlq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_IMM, vrshrq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_UNONE_IMM, vqshlq_n_u, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_NONE_NONE, vcmpneq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_NONE_NONE, vcmpltq_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_NONE_NONE, vcmpltq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_NONE_NONE, vcmpleq_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_NONE_NONE, vcmpleq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_NONE_NONE, vcmpgtq_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_NONE_NONE, vcmpgtq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_NONE_NONE, vcmpgeq_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_NONE_NONE, vcmpgeq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_NONE_NONE, vcmpeqq_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_NONE_NONE, vcmpeqq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_UNONE_NONE_IMM, vqshluq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_UNONE, vaddvq_p_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vsubq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vsubq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vshlq_r_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vrshlq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vrshlq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vrmulhq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vrhaddq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vqsubq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vqsubq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vqshlq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vqshlq_r_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vqrshlq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vqrshlq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vqrdmulhq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vqrdmulhq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vqdmulhq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vqdmulhq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vqaddq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vqaddq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vorrq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vornq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vmulq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vmulq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vmulltq_int_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vmullbq_int_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vmulhq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vmlsdavxq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vmlsdavq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vmladavxq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vmladavq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vminvq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vminq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vmaxvq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vmaxq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vhsubq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vhsubq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vhcaddq_rot90_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vhcaddq_rot270_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vhaddq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vhaddq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, veorq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vcaddq_rot90_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vcaddq_rot270_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vbrsrq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vbicq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vandq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vaddvaq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vaddq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_NONE, vabdq_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_IMM, vshlq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_IMM, vrshrq_n_s, v16qi, v8hi, v4si) +VAR3(BINOP_NONE_NONE_IMM, vqshlq_n_s, v16qi, v8hi, v4si) +VAR2(BINOP_UNONE_UNONE_UNONE, vqmovntq_u, v8hi, v4si) +VAR2(BINOP_UNONE_UNONE_UNONE, vqmovnbq_u, v8hi, v4si) +VAR2(BINOP_UNONE_UNONE_UNONE, vmulltq_poly_p, v16qi, v8hi) +VAR2(BINOP_UNONE_UNONE_UNONE, vmullbq_poly_p, v16qi, v8hi) +VAR2(BINOP_UNONE_UNONE_UNONE, vmovntq_u, v8hi, v4si) +VAR2(BINOP_UNONE_UNONE_UNONE, vmovnbq_u, v8hi, v4si) +VAR2(BINOP_UNONE_UNONE_UNONE, vmlaldavq_u, v8hi, v4si) +VAR2(BINOP_UNONE_UNONE_NONE, vqmovuntq_s, v8hi, v4si) +VAR2(BINOP_UNONE_UNONE_NONE, vqmovunbq_s, v8hi, v4si) +VAR2(BINOP_UNONE_UNONE_IMM, vshlltq_n_u, v16qi, v8hi) +VAR2(BINOP_UNONE_UNONE_IMM, vshllbq_n_u, v16qi, v8hi) +VAR2(BINOP_UNONE_UNONE_IMM, vorrq_n_u, v8hi, v4si) +VAR2(BINOP_UNONE_UNONE_IMM, vbicq_n_u, v8hi, v4si) +VAR2(BINOP_UNONE_NONE_NONE, vcmpneq_n_f, v8hf, v4sf) +VAR2(BINOP_UNONE_NONE_NONE, vcmpneq_f, v8hf, v4sf) +VAR2(BINOP_UNONE_NONE_NONE, vcmpltq_n_f, v8hf, v4sf) +VAR2(BINOP_UNONE_NONE_NONE, vcmpltq_f, v8hf, v4sf) +VAR2(BINOP_UNONE_NONE_NONE, vcmpleq_n_f, v8hf, v4sf) +VAR2(BINOP_UNONE_NONE_NONE, vcmpleq_f, v8hf, v4sf) +VAR2(BINOP_UNONE_NONE_NONE, vcmpgtq_n_f, v8hf, v4sf) +VAR2(BINOP_UNONE_NONE_NONE, vcmpgtq_f, v8hf, v4sf) +VAR2(BINOP_UNONE_NONE_NONE, vcmpgeq_n_f, v8hf, v4sf) +VAR2(BINOP_UNONE_NONE_NONE, vcmpgeq_f, v8hf, v4sf) +VAR2(BINOP_UNONE_NONE_NONE, vcmpeqq_n_f, v8hf, v4sf) +VAR2(BINOP_UNONE_NONE_NONE, vcmpeqq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vsubq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vqmovntq_s, v8hi, v4si) +VAR2(BINOP_NONE_NONE_NONE, vqmovnbq_s, v8hi, v4si) +VAR2(BINOP_NONE_NONE_NONE, vqdmulltq_s, v8hi, v4si) +VAR2(BINOP_NONE_NONE_NONE, vqdmulltq_n_s, v8hi, v4si) +VAR2(BINOP_NONE_NONE_NONE, vqdmullbq_s, v8hi, v4si) +VAR2(BINOP_NONE_NONE_NONE, vqdmullbq_n_s, v8hi, v4si) +VAR2(BINOP_NONE_NONE_NONE, vorrq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vornq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vmulq_n_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vmulq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vmovntq_s, v8hi, v4si) +VAR2(BINOP_NONE_NONE_NONE, vmovnbq_s, v8hi, v4si) +VAR2(BINOP_NONE_NONE_NONE, vmlsldavxq_s, v8hi, v4si) +VAR2(BINOP_NONE_NONE_NONE, vmlsldavq_s, v8hi, v4si) +VAR2(BINOP_NONE_NONE_NONE, vmlaldavxq_s, v8hi, v4si) +VAR2(BINOP_NONE_NONE_NONE, vmlaldavq_s, v8hi, v4si) +VAR2(BINOP_NONE_NONE_NONE, vminnmvq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vminnmq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vminnmavq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vminnmaq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vmaxnmvq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vmaxnmq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vmaxnmavq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vmaxnmaq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, veorq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vcmulq_rot90_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vcmulq_rot270_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vcmulq_rot180_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vcmulq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vcaddq_rot90_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vcaddq_rot270_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vbicq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vandq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vaddq_n_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_NONE, vabdq_f, v8hf, v4sf) +VAR2(BINOP_NONE_NONE_IMM, vshlltq_n_s, v16qi, v8hi) +VAR2(BINOP_NONE_NONE_IMM, vshllbq_n_s, v16qi, v8hi) +VAR2(BINOP_NONE_NONE_IMM, vorrq_n_s, v8hi, v4si) +VAR2(BINOP_NONE_NONE_IMM, vbicq_n_s, v8hi, v4si) +VAR1(BINOP_UNONE_UNONE_UNONE, vrmlaldavhq_u, v4si) +VAR1(BINOP_UNONE_UNONE_UNONE, vctp8q_m, hi) +VAR1(BINOP_UNONE_UNONE_UNONE, vctp64q_m, hi) +VAR1(BINOP_UNONE_UNONE_UNONE, vctp32q_m, hi) +VAR1(BINOP_UNONE_UNONE_UNONE, vctp16q_m, hi) +VAR1(BINOP_UNONE_UNONE_UNONE, vaddlvaq_u, v4si) +VAR1(BINOP_NONE_NONE_NONE, vrmlsldavhxq_s, v4si) +VAR1(BINOP_NONE_NONE_NONE, vrmlsldavhq_s, v4si) +VAR1(BINOP_NONE_NONE_NONE, vrmlaldavhxq_s, v4si) +VAR1(BINOP_NONE_NONE_NONE, vrmlaldavhq_s, v4si) +VAR1(BINOP_NONE_NONE_NONE, vcvttq_f16_f32, v8hf) +VAR1(BINOP_NONE_NONE_NONE, vcvtbq_f16_f32, v8hf) +VAR1(BINOP_NONE_NONE_NONE, vaddlvaq_s, v4si) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 8e817b890da..24fb8167d6a 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -66,7 +66,26 @@ VMLADAVXQ_S VMLSDAVQ_S VMLSDAVXQ_S VQDMULHQ_N_S VQDMULHQ_S VQRDMULHQ_N_S VQRDMULHQ_S VQSHLUQ_N_S VCMPCSQ_N_U VCMPCSQ_U VCMPHIQ_N_U VCMPHIQ_U VABDQ_M_S - VABDQ_M_U]) + VABDQ_M_U VABDQ_F VADDQ_N_F VANDQ_F VBICQ_F + VCADDQ_ROT270_F VCADDQ_ROT90_F VCMPEQQ_F VCMPEQQ_N_F + VCMPGEQ_F VCMPGEQ_N_F VCMPGTQ_F VCMPGTQ_N_F VCMPLEQ_F + VCMPLEQ_N_F VCMPLTQ_F VCMPLTQ_N_F VCMPNEQ_F VCMPNEQ_N_F + VCMULQ_F VCMULQ_ROT180_F VCMULQ_ROT270_F VCMULQ_ROT90_F + VEORQ_F VMAXNMAQ_F VMAXNMAVQ_F VMAXNMQ_F VMAXNMVQ_F + VMINNMAQ_F VMINNMAVQ_F VMINNMQ_F VMINNMVQ_F VMULQ_F + VMULQ_N_F VORNQ_F VORRQ_F VSUBQ_F VADDLVAQ_U + VADDLVAQ_S VBICQ_N_U VBICQ_N_S VCTP8Q_M VCTP16Q_M + VCTP32Q_M VCTP64Q_M VCVTBQ_F16_F32 VCVTTQ_F16_F32 + VMLALDAVQ_U VMLALDAVXQ_U VMLALDAVXQ_S VMLALDAVQ_S + VMLSLDAVQ_S VMLSLDAVXQ_S VMOVNBQ_U VMOVNBQ_S + VMOVNTQ_U VMOVNTQ_S VORRQ_N_S VORRQ_N_U VQDMULLBQ_N_S + VQDMULLBQ_S VQDMULLTQ_N_S VQDMULLTQ_S VQMOVNBQ_U + VQMOVNBQ_S VQMOVUNBQ_S VQMOVUNTQ_S VRMLALDAVHXQ_S + VRMLSLDAVHQ_S VRMLSLDAVHXQ_S VSHLLBQ_S + VSHLLBQ_U VSHLLTQ_U VSHLLTQ_S VQMOVNTQ_U VQMOVNTQ_S + VSHLLBQ_N_S VSHLLBQ_N_U VSHLLTQ_N_U VSHLLTQ_N_S + VRMLALDAVHQ_U VRMLALDAVHQ_S VMULLTQ_POLY_P + VMULLBQ_POLY_P]) (define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (V8HF "V8HI") (V4SF "V4SI")]) @@ -119,10 +138,19 @@ (VSHLQ_N_S "s") (VSHLQ_N_U "u") (VSHLQ_R_S "s") (VSHLQ_R_U "u") (VSUBQ_N_S "s") (VSUBQ_N_U "u") (VSUBQ_S "s") (VSUBQ_U "u") (VADDVAQ_S "s") - (VADDVAQ_U "u")]) + (VADDVAQ_U "u") (VADDLVAQ_S "s") (VADDLVAQ_U "u") + (VBICQ_N_S "s") (VBICQ_N_U "u") (VMLALDAVQ_U "u") + (VMLALDAVQ_S "s") (VMLALDAVXQ_U "u") (VMLALDAVXQ_S "s") + (VMOVNBQ_U "u") (VMOVNBQ_S "s") (VMOVNTQ_U "u") + (VMOVNTQ_S "s") (VORRQ_N_S "s") (VORRQ_N_U "u") + (VQMOVNBQ_U "u") (VQMOVNBQ_S "s") (VQMOVNTQ_S "s") + (VQMOVNTQ_U "u") (VSHLLBQ_N_U "u") (VSHLLBQ_N_S "s") + (VSHLLTQ_N_U "u") (VSHLLTQ_N_S "s") (VRMLALDAVHQ_U "u") + (VRMLALDAVHQ_S "s")]) (define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32") - (VCTP64Q "64")]) + (VCTP64Q "64") (VCTP8Q_M "8") (VCTP16Q_M "16") + (VCTP32Q_M "32") (VCTP64Q_M "64")]) (define_mode_attr MVE_pred2 [(V16QI "mve_imm_8") (V8HI "mve_imm_16") (V4SI "mve_imm_32")]) (define_mode_attr MVE_constraint2 [(V16QI "Rb") (V8HI "Rd") (V4SI "Rf")]) @@ -146,6 +174,7 @@ (define_int_iterator VCVTMQ [VCVTMQ_S VCVTMQ_U]) (define_int_iterator VADDLVQ [VADDLVQ_U VADDLVQ_S]) (define_int_iterator VCTPQ [VCTP8Q VCTP16Q VCTP32Q VCTP64Q]) +(define_int_iterator VCTPQ_M [VCTP8Q_M VCTP16Q_M VCTP32Q_M VCTP64Q_M]) (define_int_iterator VCVTQ_N_TO_F [VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U]) (define_int_iterator VCREATEQ [VCREATEQ_U VCREATEQ_S]) (define_int_iterator VSHRQ_N [VSHRQ_N_S VSHRQ_N_U]) @@ -200,7 +229,18 @@ (define_int_iterator VSHLQ_R [VSHLQ_R_S VSHLQ_R_U]) (define_int_iterator VSUBQ [VSUBQ_S VSUBQ_U]) (define_int_iterator VSUBQ_N [VSUBQ_N_S VSUBQ_N_U]) - +(define_int_iterator VADDLVAQ [VADDLVAQ_S VADDLVAQ_U]) +(define_int_iterator VBICQ_N [VBICQ_N_S VBICQ_N_U]) +(define_int_iterator VMLALDAVQ [VMLALDAVQ_U VMLALDAVQ_S]) +(define_int_iterator VMLALDAVXQ [VMLALDAVXQ_U VMLALDAVXQ_S]) +(define_int_iterator VMOVNBQ [VMOVNBQ_U VMOVNBQ_S]) +(define_int_iterator VMOVNTQ [VMOVNTQ_S VMOVNTQ_U]) +(define_int_iterator VORRQ_N [VORRQ_N_U VORRQ_N_S]) +(define_int_iterator VQMOVNBQ [VQMOVNBQ_U VQMOVNBQ_S]) +(define_int_iterator VQMOVNTQ [VQMOVNTQ_U VQMOVNTQ_S]) +(define_int_iterator VSHLLBQ_N [VSHLLBQ_N_S VSHLLBQ_N_U]) +(define_int_iterator VSHLLTQ_N [VSHLLTQ_N_U VSHLLTQ_N_S]) +(define_int_iterator VRMLALDAVHQ [VRMLALDAVHQ_U VRMLALDAVHQ_S]) (define_insn "*mve_mov<mode>" [(set (match_operand:MVE_types 0 "nonimmediate_operand" "=w,w,r,w,w,r,w,Us") @@ -2057,3 +2097,963 @@ "vsub.i%#<V_sz_elem>\t%q0, %q1, %q2" [(set_attr "type" "mve_move") ]) + +;; +;; [vabdq_f]) +;; +(define_insn "mve_vabdq_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VABDQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vabd.f%#<V_sz_elem> %q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vaddlvaq_s vaddlvaq_u]) +;; +(define_insn "mve_vaddlvaq_<supf>v4si" + [ + (set (match_operand:DI 0 "s_register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "s_register_operand" "0") + (match_operand:V4SI 2 "s_register_operand" "w")] + VADDLVAQ)) + ] + "TARGET_HAVE_MVE" + "vaddlva.<supf>32 %Q0, %R0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vaddq_n_f]) +;; +(define_insn "mve_vaddq_n_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:<V_elem> 2 "s_register_operand" "r")] + VADDQ_N_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vadd.f%#<V_sz_elem> %q0, %q1, %2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vandq_f]) +;; +(define_insn "mve_vandq_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VANDQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vand %q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vbicq_f]) +;; +(define_insn "mve_vbicq_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VBICQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vbic %q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vbicq_n_s, vbicq_n_u]) +;; +(define_insn "mve_vbicq_n_<supf><mode>" + [ + (set (match_operand:MVE_5 0 "s_register_operand" "=w") + (unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")] + VBICQ_N)) + ] + "TARGET_HAVE_MVE" + "vbic.i%#<V_sz_elem> %q0, %2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcaddq_rot270_f]) +;; +(define_insn "mve_vcaddq_rot270_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VCADDQ_ROT270_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #270" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcaddq_rot90_f]) +;; +(define_insn "mve_vcaddq_rot90_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VCADDQ_ROT90_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #90" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmpeqq_f]) +;; +(define_insn "mve_vcmpeqq_f<mode>" + [ + (set (match_operand:HI 0 "vpr_register_operand" "=Up") + (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VCMPEQQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmp.f%#<V_sz_elem> eq, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmpeqq_n_f]) +;; +(define_insn "mve_vcmpeqq_n_f<mode>" + [ + (set (match_operand:HI 0 "vpr_register_operand" "=Up") + (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:<V_elem> 2 "s_register_operand" "r")] + VCMPEQQ_N_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmp.f%#<V_sz_elem> eq, %q1, %2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmpgeq_f]) +;; +(define_insn "mve_vcmpgeq_f<mode>" + [ + (set (match_operand:HI 0 "vpr_register_operand" "=Up") + (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VCMPGEQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmp.f%#<V_sz_elem> ge, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmpgeq_n_f]) +;; +(define_insn "mve_vcmpgeq_n_f<mode>" + [ + (set (match_operand:HI 0 "vpr_register_operand" "=Up") + (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:<V_elem> 2 "s_register_operand" "r")] + VCMPGEQ_N_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmp.f%#<V_sz_elem> ge, %q1, %2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmpgtq_f]) +;; +(define_insn "mve_vcmpgtq_f<mode>" + [ + (set (match_operand:HI 0 "vpr_register_operand" "=Up") + (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VCMPGTQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmp.f%#<V_sz_elem> gt, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmpgtq_n_f]) +;; +(define_insn "mve_vcmpgtq_n_f<mode>" + [ + (set (match_operand:HI 0 "vpr_register_operand" "=Up") + (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:<V_elem> 2 "s_register_operand" "r")] + VCMPGTQ_N_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmp.f%#<V_sz_elem> gt, %q1, %2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmpleq_f]) +;; +(define_insn "mve_vcmpleq_f<mode>" + [ + (set (match_operand:HI 0 "vpr_register_operand" "=Up") + (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VCMPLEQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmp.f%#<V_sz_elem> le, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmpleq_n_f]) +;; +(define_insn "mve_vcmpleq_n_f<mode>" + [ + (set (match_operand:HI 0 "vpr_register_operand" "=Up") + (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:<V_elem> 2 "s_register_operand" "r")] + VCMPLEQ_N_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmp.f%#<V_sz_elem> le, %q1, %2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmpltq_f]) +;; +(define_insn "mve_vcmpltq_f<mode>" + [ + (set (match_operand:HI 0 "vpr_register_operand" "=Up") + (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VCMPLTQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmp.f%#<V_sz_elem> lt, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmpltq_n_f]) +;; +(define_insn "mve_vcmpltq_n_f<mode>" + [ + (set (match_operand:HI 0 "vpr_register_operand" "=Up") + (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:<V_elem> 2 "s_register_operand" "r")] + VCMPLTQ_N_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmp.f%#<V_sz_elem> lt, %q1, %2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmpneq_f]) +;; +(define_insn "mve_vcmpneq_f<mode>" + [ + (set (match_operand:HI 0 "vpr_register_operand" "=Up") + (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VCMPNEQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmp.f%#<V_sz_elem> ne, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmpneq_n_f]) +;; +(define_insn "mve_vcmpneq_n_f<mode>" + [ + (set (match_operand:HI 0 "vpr_register_operand" "=Up") + (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:<V_elem> 2 "s_register_operand" "r")] + VCMPNEQ_N_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmp.f%#<V_sz_elem> ne, %q1, %2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmulq_f]) +;; +(define_insn "mve_vcmulq_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VCMULQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #0" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmulq_rot180_f]) +;; +(define_insn "mve_vcmulq_rot180_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VCMULQ_ROT180_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #180" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmulq_rot270_f]) +;; +(define_insn "mve_vcmulq_rot270_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VCMULQ_ROT270_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #270" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcmulq_rot90_f]) +;; +(define_insn "mve_vcmulq_rot90_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VCMULQ_ROT90_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #90" + [(set_attr "type" "mve_move") +]) + +;; +;; [vctp8q_m vctp16q_m vctp32q_m vctp64q_m]) +;; +(define_insn "mve_vctp<mode1>q_mhi" + [ + (set (match_operand:HI 0 "vpr_register_operand" "=Up") + (unspec:HI [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:HI 2 "vpr_register_operand" "Up")] + VCTPQ_M)) + ] + "TARGET_HAVE_MVE" + "vpst\;vctpt.<mode1> %1" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vcvtbq_f16_f32]) +;; +(define_insn "mve_vcvtbq_f16_f32v8hf" + [ + (set (match_operand:V8HF 0 "s_register_operand" "=w") + (unspec:V8HF [(match_operand:V8HF 1 "s_register_operand" "0") + (match_operand:V4SF 2 "s_register_operand" "w")] + VCVTBQ_F16_F32)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcvtb.f16.f32 %q0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vcvttq_f16_f32]) +;; +(define_insn "mve_vcvttq_f16_f32v8hf" + [ + (set (match_operand:V8HF 0 "s_register_operand" "=w") + (unspec:V8HF [(match_operand:V8HF 1 "s_register_operand" "0") + (match_operand:V4SF 2 "s_register_operand" "w")] + VCVTTQ_F16_F32)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vcvtt.f16.f32 %q0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [veorq_f]) +;; +(define_insn "mve_veorq_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VEORQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "veor %q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vmaxnmaq_f]) +;; +(define_insn "mve_vmaxnmaq_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VMAXNMAQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vmaxnma.f%#<V_sz_elem> %q0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vmaxnmavq_f]) +;; +(define_insn "mve_vmaxnmavq_f<mode>" + [ + (set (match_operand:<V_elem> 0 "s_register_operand" "=r") + (unspec:<V_elem> [(match_operand:<V_elem> 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VMAXNMAVQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vmaxnmav.f%#<V_sz_elem> %0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vmaxnmq_f]) +;; +(define_insn "mve_vmaxnmq_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VMAXNMQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vmaxnm.f%#<V_sz_elem> %q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vmaxnmvq_f]) +;; +(define_insn "mve_vmaxnmvq_f<mode>" + [ + (set (match_operand:<V_elem> 0 "s_register_operand" "=r") + (unspec:<V_elem> [(match_operand:<V_elem> 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VMAXNMVQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vmaxnmv.f%#<V_sz_elem> %0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vminnmaq_f]) +;; +(define_insn "mve_vminnmaq_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VMINNMAQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vminnma.f%#<V_sz_elem> %q0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vminnmavq_f]) +;; +(define_insn "mve_vminnmavq_f<mode>" + [ + (set (match_operand:<V_elem> 0 "s_register_operand" "=r") + (unspec:<V_elem> [(match_operand:<V_elem> 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VMINNMAVQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vminnmav.f%#<V_sz_elem> %0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vminnmq_f]) +;; +(define_insn "mve_vminnmq_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VMINNMQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vminnm.f%#<V_sz_elem> %q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vminnmvq_f]) +;; +(define_insn "mve_vminnmvq_f<mode>" + [ + (set (match_operand:<V_elem> 0 "s_register_operand" "=r") + (unspec:<V_elem> [(match_operand:<V_elem> 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VMINNMVQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vminnmv.f%#<V_sz_elem> %0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vmlaldavq_u, vmlaldavq_s]) +;; +(define_insn "mve_vmlaldavq_<supf><mode>" + [ + (set (match_operand:DI 0 "s_register_operand" "=r") + (unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w") + (match_operand:MVE_5 2 "s_register_operand" "w")] + VMLALDAVQ)) + ] + "TARGET_HAVE_MVE" + "vmlaldav.<supf>%#<V_sz_elem> %Q0, %R0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vmlaldavxq_s]) +;; +(define_insn "mve_vmlaldavxq_s<mode>" + [ + (set (match_operand:DI 0 "s_register_operand" "=r") + (unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w") + (match_operand:MVE_5 2 "s_register_operand" "w")] + VMLALDAVXQ_S)) + ] + "TARGET_HAVE_MVE" + "vmlaldavx.s%#<V_sz_elem> %Q0, %R0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vmlsldavq_s]) +;; +(define_insn "mve_vmlsldavq_s<mode>" + [ + (set (match_operand:DI 0 "s_register_operand" "=r") + (unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w") + (match_operand:MVE_5 2 "s_register_operand" "w")] + VMLSLDAVQ_S)) + ] + "TARGET_HAVE_MVE" + "vmlsldav.s%#<V_sz_elem> %Q0, %R0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vmlsldavxq_s]) +;; +(define_insn "mve_vmlsldavxq_s<mode>" + [ + (set (match_operand:DI 0 "s_register_operand" "=r") + (unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w") + (match_operand:MVE_5 2 "s_register_operand" "w")] + VMLSLDAVXQ_S)) + ] + "TARGET_HAVE_MVE" + "vmlsldavx.s%#<V_sz_elem> %Q0, %R0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vmovnbq_u, vmovnbq_s]) +;; +(define_insn "mve_vmovnbq_<supf><mode>" + [ + (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w") + (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") + (match_operand:MVE_5 2 "s_register_operand" "w")] + VMOVNBQ)) + ] + "TARGET_HAVE_MVE" + "vmovnb.i%#<V_sz_elem> %q0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vmovntq_s, vmovntq_u]) +;; +(define_insn "mve_vmovntq_<supf><mode>" + [ + (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w") + (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") + (match_operand:MVE_5 2 "s_register_operand" "w")] + VMOVNTQ)) + ] + "TARGET_HAVE_MVE" + "vmovnt.i%#<V_sz_elem> %q0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vmulq_f]) +;; +(define_insn "mve_vmulq_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VMULQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vmul.f%#<V_sz_elem> %q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vmulq_n_f]) +;; +(define_insn "mve_vmulq_n_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:<V_elem> 2 "s_register_operand" "r")] + VMULQ_N_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vmul.f%#<V_sz_elem> %q0, %q1, %2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vornq_f]) +;; +(define_insn "mve_vornq_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VORNQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vorn %q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vorrq_f]) +;; +(define_insn "mve_vorrq_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VORRQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vorr %q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vorrq_n_u, vorrq_n_s]) +;; +(define_insn "mve_vorrq_n_<supf><mode>" + [ + (set (match_operand:MVE_5 0 "s_register_operand" "=w") + (unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")] + VORRQ_N)) + ] + "TARGET_HAVE_MVE" + "vorr.i%#<V_sz_elem> %q0, %2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vqdmullbq_n_s]) +;; +(define_insn "mve_vqdmullbq_n_s<mode>" + [ + (set (match_operand:<V_double_width> 0 "s_register_operand" "=w") + (unspec:<V_double_width> [(match_operand:MVE_5 1 "s_register_operand" "w") + (match_operand:<V_elem> 2 "s_register_operand" "r")] + VQDMULLBQ_N_S)) + ] + "TARGET_HAVE_MVE" + "vqdmullb.s%#<V_sz_elem> %q0, %q1, %2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vqdmullbq_s]) +;; +(define_insn "mve_vqdmullbq_s<mode>" + [ + (set (match_operand:<V_double_width> 0 "s_register_operand" "=w") + (unspec:<V_double_width> [(match_operand:MVE_5 1 "s_register_operand" "w") + (match_operand:MVE_5 2 "s_register_operand" "w")] + VQDMULLBQ_S)) + ] + "TARGET_HAVE_MVE" + "vqdmullb.s%#<V_sz_elem> %q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vqdmulltq_n_s]) +;; +(define_insn "mve_vqdmulltq_n_s<mode>" + [ + (set (match_operand:<V_double_width> 0 "s_register_operand" "=w") + (unspec:<V_double_width> [(match_operand:MVE_5 1 "s_register_operand" "w") + (match_operand:<V_elem> 2 "s_register_operand" "r")] + VQDMULLTQ_N_S)) + ] + "TARGET_HAVE_MVE" + "vqdmullt.s%#<V_sz_elem> %q0, %q1, %2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vqdmulltq_s]) +;; +(define_insn "mve_vqdmulltq_s<mode>" + [ + (set (match_operand:<V_double_width> 0 "s_register_operand" "=w") + (unspec:<V_double_width> [(match_operand:MVE_5 1 "s_register_operand" "w") + (match_operand:MVE_5 2 "s_register_operand" "w")] + VQDMULLTQ_S)) + ] + "TARGET_HAVE_MVE" + "vqdmullt.s%#<V_sz_elem> %q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vqmovnbq_u, vqmovnbq_s]) +;; +(define_insn "mve_vqmovnbq_<supf><mode>" + [ + (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w") + (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") + (match_operand:MVE_5 2 "s_register_operand" "w")] + VQMOVNBQ)) + ] + "TARGET_HAVE_MVE" + "vqmovnb.<supf>%#<V_sz_elem> %q0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vqmovntq_u, vqmovntq_s]) +;; +(define_insn "mve_vqmovntq_<supf><mode>" + [ + (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w") + (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") + (match_operand:MVE_5 2 "s_register_operand" "w")] + VQMOVNTQ)) + ] + "TARGET_HAVE_MVE" + "vqmovnt.<supf>%#<V_sz_elem> %q0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vqmovunbq_s]) +;; +(define_insn "mve_vqmovunbq_s<mode>" + [ + (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w") + (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") + (match_operand:MVE_5 2 "s_register_operand" "w")] + VQMOVUNBQ_S)) + ] + "TARGET_HAVE_MVE" + "vqmovunb.s%#<V_sz_elem> %q0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vqmovuntq_s]) +;; +(define_insn "mve_vqmovuntq_s<mode>" + [ + (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w") + (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") + (match_operand:MVE_5 2 "s_register_operand" "w")] + VQMOVUNTQ_S)) + ] + "TARGET_HAVE_MVE" + "vqmovunt.s%#<V_sz_elem> %q0, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vrmlaldavhxq_s]) +;; +(define_insn "mve_vrmlaldavhxq_sv4si" + [ + (set (match_operand:DI 0 "s_register_operand" "=r") + (unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SI 2 "s_register_operand" "w")] + VRMLALDAVHXQ_S)) + ] + "TARGET_HAVE_MVE" + "vrmlaldavhx.s32 %Q0, %R0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vrmlsldavhq_s]) +;; +(define_insn "mve_vrmlsldavhq_sv4si" + [ + (set (match_operand:DI 0 "s_register_operand" "=r") + (unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SI 2 "s_register_operand" "w")] + VRMLSLDAVHQ_S)) + ] + "TARGET_HAVE_MVE" + "vrmlsldavh.s32\t%Q0, %R0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vrmlsldavhxq_s]) +;; +(define_insn "mve_vrmlsldavhxq_sv4si" + [ + (set (match_operand:DI 0 "s_register_operand" "=r") + (unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SI 2 "s_register_operand" "w")] + VRMLSLDAVHXQ_S)) + ] + "TARGET_HAVE_MVE" + "vrmlsldavhx.s32\t%Q0, %R0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vshllbq_n_s, vshllbq_n_u]) +;; +(define_insn "mve_vshllbq_n_<supf><mode>" + [ + (set (match_operand:<V_double_width> 0 "s_register_operand" "=w") + (unspec:<V_double_width> [(match_operand:MVE_3 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VSHLLBQ_N)) + ] + "TARGET_HAVE_MVE" + "vshllb.<supf>%#<V_sz_elem>\t%q0, %q1, %2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vshlltq_n_u, vshlltq_n_s]) +;; +(define_insn "mve_vshlltq_n_<supf><mode>" + [ + (set (match_operand:<V_double_width> 0 "s_register_operand" "=w") + (unspec:<V_double_width> [(match_operand:MVE_3 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VSHLLTQ_N)) + ] + "TARGET_HAVE_MVE" + "vshllt.<supf>%#<V_sz_elem>\t%q0, %q1, %2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vsubq_f]) +;; +(define_insn "mve_vsubq_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w")] + VSUBQ_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vsub.f%#<V_sz_elem>\t%q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vmulltq_poly_p]) +;; +(define_insn "mve_vmulltq_poly_p<mode>" + [ + (set (match_operand:<V_double_width> 0 "s_register_operand" "=w") + (unspec:<V_double_width> [(match_operand:MVE_3 1 "s_register_operand" "w") + (match_operand:MVE_3 2 "s_register_operand" "w")] + VMULLTQ_POLY_P)) + ] + "TARGET_HAVE_MVE" + "vmullt.p%#<V_sz_elem>\t%q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vmullbq_poly_p]) +;; +(define_insn "mve_vmullbq_poly_p<mode>" + [ + (set (match_operand:<V_double_width> 0 "s_register_operand" "=w") + (unspec:<V_double_width> [(match_operand:MVE_3 1 "s_register_operand" "w") + (match_operand:MVE_3 2 "s_register_operand" "w")] + VMULLBQ_POLY_P)) + ] + "TARGET_HAVE_MVE" + "vmullb.p%#<V_sz_elem>\t%q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + +;; +;; [vrmlaldavhq_u vrmlaldavhq_s]) +;; +(define_insn "mve_vrmlaldavhq_<supf>v4si" + [ + (set (match_operand:DI 0 "s_register_operand" "=r") + (unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SI 2 "s_register_operand" "w")] + VRMLALDAVHQ)) + ] + "TARGET_HAVE_MVE" + "vrmlaldavh.<supf>32 %Q0, %R0, %q1, %q2" + [(set_attr "type" "mve_move") +]) |