diff options
author | Tamar Christina <tamar.christina@arm.com> | 2020-12-13 13:59:33 +0000 |
---|---|---|
committer | Tamar Christina <tamar.christina@arm.com> | 2020-12-13 14:12:34 +0000 |
commit | 3b8a82f97dd48e153ce93b317c44254839e11461 (patch) | |
tree | 2f34b1dc783f32ac5c0d3d6dc6033d6f68e5d69e /gcc/config/arm/unspecs.md | |
parent | 2f05dadaeda6068ad570117be21f2c16e2f4fa10 (diff) | |
download | gcc-3b8a82f97dd48e153ce93b317c44254839e11461.tar.gz |
Arm: Add NEON and MVE RTL patterns for Complex Addition, Multiply and FMA.
This adds implementation for the optabs for complex additions. With this the
following C code:
void f90 (float complex a[restrict N], float complex b[restrict N],
float complex c[restrict N])
{
for (int i=0; i < N; i++)
c[i] = a[i] + (b[i] * I);
}
generates
f90:
add r3, r2, #1600
.L2:
vld1.32 {q8}, [r0]!
vld1.32 {q9}, [r1]!
vcadd.f32 q8, q8, q9, #90
vst1.32 {q8}, [r2]!
cmp r3, r2
bne .L2
bx lr
instead of
f90:
add r3, r2, #1600
.L2:
vld2.32 {d24-d27}, [r0]!
vld2.32 {d20-d23}, [r1]!
vsub.f32 q8, q12, q11
vadd.f32 q9, q13, q10
vst2.32 {d16-d19}, [r2]!
cmp r3, r2
bne .L2
bx lr
gcc/ChangeLog:
* config/arm/arm_mve.h (__arm_vcaddq_rot90_u8, __arm_vcaddq_rot270_u8,
, __arm_vcaddq_rot90_s8, __arm_vcaddq_rot270_s8,
__arm_vcaddq_rot90_u16, __arm_vcaddq_rot270_u16, __arm_vcaddq_rot90_s16,
__arm_vcaddq_rot270_s16, __arm_vcaddq_rot90_u32,
__arm_vcaddq_rot270_u32, __arm_vcaddq_rot90_s32,
__arm_vcaddq_rot270_s32, __arm_vcmulq_rot90_f16,
__arm_vcmulq_rot270_f16, __arm_vcmulq_rot180_f16,
__arm_vcmulq_f16, __arm_vcaddq_rot90_f16, __arm_vcaddq_rot270_f16,
__arm_vcmulq_rot90_f32, __arm_vcmulq_rot270_f32,
__arm_vcmulq_rot180_f32, __arm_vcmulq_f32, __arm_vcaddq_rot90_f32,
__arm_vcaddq_rot270_f32, __arm_vcmlaq_f16, __arm_vcmlaq_rot180_f16,
__arm_vcmlaq_rot270_f16, __arm_vcmlaq_rot90_f16, __arm_vcmlaq_f32,
__arm_vcmlaq_rot180_f32, __arm_vcmlaq_rot270_f32,
__arm_vcmlaq_rot90_f32): Update builtin calls.
* config/arm/arm_mve_builtins.def (vcaddq_rot90_u, vcaddq_rot270_u,
vcaddq_rot90_s, vcaddq_rot270_s, vcaddq_rot90_f, vcaddq_rot270_f,
vcmulq_f, vcmulq_rot90_f, vcmulq_rot180_f, vcmulq_rot270_f,
vcmlaq_f, vcmlaq_rot90_f, vcmlaq_rot180_f, vcmlaq_rot270_f): Removed.
(vcaddq_rot90, vcaddq_rot270, vcmulq, vcmulq_rot90, vcmulq_rot180,
vcmulq_rot270, vcmlaq, vcmlaq_rot90, vcmlaq_rot180, vcmlaq_rot270):
New.
* config/arm/constraints.md (Dz): Include MVE.
* config/arm/iterators.md (mve_rotsplit1, mve_rotsplit2): New.
(rot): Add UNSPEC_VCMLS, UNSPEC_VCMUL and UNSPEC_VCMUL180.
(rot_op, rotsplit1, rotsplit2, fcmac1, VCMLA_OP, VCMUL_OP): New.
* config/arm/mve.md (VCADDQ_ROT270_S, VCADDQ_ROT90_S, VCADDQ_ROT270_U,
VCADDQ_ROT90_U, VCADDQ_ROT270_F, VCADDQ_ROT90_F, VCMULQ_F,
VCMULQ_ROT180_F, VCMULQ_ROT270_F, VCMULQ_ROT90_F, VCMLAQ_F,
VCMLAQ_ROT180_F, VCMLAQ_ROT90_F, VCMLAQ_ROT270_F, VCADDQ_ROT270_S,
VCADDQ_ROT270, VCADDQ_ROT90): Removed.
(mve_rot, VCMUL): New.
(mve_vcaddq_rot270_<supf><mode, mve_vcaddq_rot90_<supf><mode>,
mve_vcaddq_rot270_f<mode>, mve_vcaddq_rot90_f<mode>, mve_vcmulq_f<mode,
mve_vcmulq_rot180_f<mode>, mve_vcmulq_rot270_f<mode>,
mve_vcmulq_rot90_f<mode>, mve_vcmlaq_f<mode>, mve_vcmlaq_rot180_f<mode>,
mve_vcmlaq_rot270_f<mode>, mve_vcmlaq_rot90_f<mode>): Removed.
(mve_vcmlaq<mve_rot><mode>, mve_vcmulq<mve_rot><mode>,
mve_vcaddq<mve_rot><mode>, cadd<rot><mode>3, mve_vcaddq<mve_rot><mode>):
New.
(cmul<rot_op><mode>3): Exclude MVE types.
* config/arm/unspecs.md (UNSPEC_VCMUL90, UNSPEC_VCMUL270): New.
* config/arm/vec-common.md (cadd<rot><mode>3, cmul<rot_op><mode>3,
arm_vcmla<rot><mode>, cml<fcmac1><rot_op><mode>4): New.
* config/arm/unspecs.md (UNSPEC_VCMUL, UNSPEC_VCMUL180, UNSPEC_VCMLS,
UNSPEC_VCMLS180): New.
* config/arm/neon.md (cmul<rot_op><mode>3): New.
Diffstat (limited to 'gcc/config/arm/unspecs.md')
-rw-r--r-- | gcc/config/arm/unspecs.md | 20 |
1 files changed, 6 insertions, 14 deletions
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index c2076c9ce6f..8bb00602103 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -510,6 +510,12 @@ UNSPEC_VCMLA90 UNSPEC_VCMLA180 UNSPEC_VCMLA270 + UNSPEC_VCMUL + UNSPEC_VCMUL90 + UNSPEC_VCMUL180 + UNSPEC_VCMUL270 + UNSPEC_VCMLS + UNSPEC_VCMLS180 UNSPEC_MATMUL_S UNSPEC_MATMUL_U UNSPEC_MATMUL_US @@ -603,8 +609,6 @@ VADDVQ_P_S VBICQ_S VBRSRQ_N_S - VCADDQ_ROT270_S - VCADDQ_ROT90_S VCMPEQQ_S VCMPEQQ_N_S VCMPNEQ_N_S @@ -648,8 +652,6 @@ VADDVQ_P_U VBICQ_U VBRSRQ_N_U - VCADDQ_ROT270_U - VCADDQ_ROT90_U VCMPEQQ_U VCMPEQQ_N_U VCMPNEQ_N_U @@ -718,8 +720,6 @@ VABDQ_F VADDQ_N_F VBICQ_F - VCADDQ_ROT270_F - VCADDQ_ROT90_F VCMPEQQ_F VCMPEQQ_N_F VCMPGEQ_F @@ -732,10 +732,6 @@ VCMPLTQ_N_F VCMPNEQ_F VCMPNEQ_N_F - VCMULQ_F - VCMULQ_ROT180_F - VCMULQ_ROT270_F - VCMULQ_ROT90_F VEORQ_F VMAXNMAQ_F VMAXNMAVQ_F @@ -908,7 +904,6 @@ VMLSLDAVAQ_S VQSHRUNBQ_N_S VQRSHRUNTQ_N_S - VCMLAQ_F VMINNMAQ_M_F VFMASQ_N_F VDUPQ_M_N_F @@ -930,14 +925,12 @@ VADDLVAQ_P_S VQMOVUNBQ_M_S VCMPLEQ_M_F - VCMLAQ_ROT180_F VMLSLDAVAXQ_S VRNDXQ_M_F VFMSQ_F VMINNMVQ_P_F VMAXNMVQ_P_F VPSELQ_F - VCMLAQ_ROT90_F VQMOVUNTQ_M_S VREV64Q_M_F VNEGQ_M_F @@ -950,7 +943,6 @@ VRMLALDAVHQ_P_S VRMLALDAVHXQ_P_S VCMPEQQ_M_N_F - VCMLAQ_ROT270_F VMAXNMAQ_M_F VRNDQ_M_F VMLALDAVQ_P_U |