summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjgreenhalgh <jgreenhalgh@138bc75d-0d04-0410-961f-82ee72b054a4>2014-11-11 17:37:35 +0000
committerjgreenhalgh <jgreenhalgh@138bc75d-0d04-0410-961f-82ee72b054a4>2014-11-11 17:37:35 +0000
commitc4d0de0bf27d8e44a439a775d50d28b442b9c8b6 (patch)
tree64feec8b8842992d131749045b465fa4bae5118a
parent8d2c507e3644ab03a0be5e9d79a7af89219f9346 (diff)
downloadgcc-c4d0de0bf27d8e44a439a775d50d28b442b9c8b6.tar.gz
[Patch AArch64] Fix up BSL expander for floating point types
gcc/ * config/aarch64/aarch64-simd.md (aarch64_simd_bsl<mode>_internal): Remove float cases, canonicalize. (aarch64_simd_bsl<mode>): Add gen_lowpart expressions where we are punning between float vectors and integer vectors. gcc/testsuite/ * gcc.target/aarch64/vbslq_f64_1.c: New. * gcc.target/aarch64/vbslq_f64_2.c: Likewise. * gcc.target/aarch64/vbslq_u64_1.c: Likewise. * gcc.target/aarch64/vbslq_u64_2.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@217362 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/config/aarch64/aarch64-simd.md32
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c21
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c17
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c22
7 files changed, 120 insertions, 10 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3e2dec32233..302958455a1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2014-11-11 James Greenhalgh <james.greenhalgh@arm.com>
+
+ * config/aarch64/aarch64-simd.md
+ (aarch64_simd_bsl<mode>_internal): Remove float cases, canonicalize.
+ (aarch64_simd_bsl<mode>): Add gen_lowpart expressions where we
+ are punning between float vectors and integer vectors.
+
2014-11-11 Uros Bizjak <ubizjak@gmail.com>
* config/alpha/alpha.c (alpha_emit_conditional_branch): Replace
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index ef196e4b6fb..f7012ecab07 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1924,15 +1924,15 @@
;; bif op0, op1, mask
(define_insn "aarch64_simd_bsl<mode>_internal"
- [(set (match_operand:VALLDIF 0 "register_operand" "=w,w,w")
- (ior:VALLDIF
- (and:VALLDIF
- (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
- (match_operand:VALLDIF 2 "register_operand" " w,w,0"))
- (and:VALLDIF
+ [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
+ (ior:VSDQ_I_DI
+ (and:VSDQ_I_DI
(not:<V_cmp_result>
- (match_dup:<V_cmp_result> 1))
- (match_operand:VALLDIF 3 "register_operand" " w,0,w"))
+ (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w"))
+ (match_operand:VSDQ_I_DI 3 "register_operand" " w,0,w"))
+ (and:VSDQ_I_DI
+ (match_dup:<V_cmp_result> 1)
+ (match_operand:VSDQ_I_DI 2 "register_operand" " w,w,0"))
))]
"TARGET_SIMD"
"@
@@ -1950,9 +1950,21 @@
"TARGET_SIMD"
{
/* We can't alias operands together if they have different modes. */
+ rtx tmp = operands[0];
+ if (FLOAT_MODE_P (<MODE>mode))
+ {
+ operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
+ operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
+ tmp = gen_reg_rtx (<V_cmp_result>mode);
+ }
operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
- emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1],
- operands[2], operands[3]));
+ emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
+ operands[1],
+ operands[2],
+ operands[3]));
+ if (tmp != operands[0])
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
+
DONE;
})
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index b301e055908..ae73a1013d1 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2014-11-11 James Greenhalgh <james.greenhalgh@arm.com>
+
+ * gcc.target/aarch64/vbslq_f64_1.c: New.
+ * gcc.target/aarch64/vbslq_f64_2.c: Likewise.
+ * gcc.target/aarch64/vbslq_u64_1.c: Likewise.
+ * gcc.target/aarch64/vbslq_u64_2.c: Likewise.
+
2014-11-11 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/63265
diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c
new file mode 100644
index 00000000000..128a1db2a66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c
@@ -0,0 +1,21 @@
+/* Test vbslq_f64 can be folded. */
+/* { dg-do assemble } */
+/* { dg-options "--save-temps -O3" } */
+
+#include <arm_neon.h>
+
+/* Folds to ret. */
+
+float32x4_t
+fold_me (float32x4_t a, float32x4_t b)
+{
+ uint32x4_t mask = {-1, -1, -1, -1};
+ return vbslq_f32 (mask, a, b);
+}
+
+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
+/* { dg-final { scan-assembler-not "bit\\tv" } } */
+/* { dg-final { scan-assembler-not "bif\\tv" } } */
+
+/* { dg-final { cleanup-saved-temps } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c b/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c
new file mode 100644
index 00000000000..62358bf5932
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c
@@ -0,0 +1,24 @@
+/* Test vbslq_f64 can be folded. */
+/* { dg-do assemble } */
+/* { dg-options "--save-temps -O3" } */
+
+#include <arm_neon.h>
+
+/* Should fold out one half of the BSL, leaving just a BIC. */
+
+float32x4_t
+half_fold_me (uint32x4_t mask)
+{
+ float32x4_t a = {0.0, 0.0, 0.0, 0.0};
+ float32x4_t b = {2.0, 4.0, 8.0, 16.0};
+ return vbslq_f32 (mask, a, b);
+
+}
+
+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
+/* { dg-final { scan-assembler-not "bit\\tv" } } */
+/* { dg-final { scan-assembler-not "bif\\tv" } } */
+/* { dg-final { scan-assembler "bic\\tv" } } */
+
+/* { dg-final { cleanup-saved-temps } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c b/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c
new file mode 100644
index 00000000000..7a4892e9577
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c
@@ -0,0 +1,17 @@
+/* Test if a BSL-like instruction can be generated from a C idiom. */
+/* { dg-do assemble } */
+/* { dg-options "--save-temps -O3" } */
+
+#include <arm_neon.h>
+
+/* Folds to BIF. */
+
+uint32x4_t
+vbslq_dummy_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t mask)
+{
+ return (mask & a) | (~mask & b);
+}
+
+/* { dg-final { scan-assembler-times "bif\\tv" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c b/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c
new file mode 100644
index 00000000000..5b70168e391
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c
@@ -0,0 +1,22 @@
+/* Test vbslq_u64 can be folded. */
+/* { dg-do assemble } */
+/* { dg-options "--save-temps -O3" } */
+#include <arm_neon.h>
+
+/* Folds to BIC. */
+
+int32x4_t
+half_fold_int (uint32x4_t mask)
+{
+ int32x4_t a = {0, 0, 0, 0};
+ int32x4_t b = {2, 4, 8, 16};
+ return vbslq_s32 (mask, a, b);
+}
+
+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
+/* { dg-final { scan-assembler-not "bit\\tv" } } */
+/* { dg-final { scan-assembler-not "bif\\tv" } } */
+/* { dg-final { scan-assembler "bic\\tv" } } */
+
+/* { dg-final { cleanup-saved-temps } } */
+