summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwilco <wilco@138bc75d-0d04-0410-961f-82ee72b054a4>2016-01-28 11:52:08 +0000
committerwilco <wilco@138bc75d-0d04-0410-961f-82ee72b054a4>2016-01-28 11:52:08 +0000
commitb2b28d24f9e8e257d1211a9b7717d9f32dc71544 (patch)
tree01eefad07cd25439ef89e429e6aa15805e974d2b
parent9c6705c6e61ead4f7a97a727216a30218cb0241f (diff)
downloadgcc-b2b28d24f9e8e257d1211a9b7717d9f32dc71544.tar.gz
Add support for vector permute cost since various permutes can expand
into a complex sequence of instructions. This fixes major performance regressions due to recent changes in SLP vectorizer (which now vectorizes more aggressively and emits many complex permutes). Set the cost to > 1 for all microarchitectures so that the number of permutes is usually zero and regressions disappear. 2016-01-28 Wilco Dijkstra <wdijkstr@arm.com> * config/aarch64/aarch64.c (generic_vector_cost): Set vec_permute_cost. (cortexa57_vector_cost): Likewise. (exynosm1_vector_cost): Likewise. (xgene1_vector_cost): Likewise. (aarch64_builtin_vectorization_cost): Use vec_permute_cost. * config/aarch64/aarch64-protos.h (cpu_vector_cost): Add vec_permute_cost entry. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@232922 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/config/aarch64/aarch64-protos.h3
-rw-r--r--gcc/config/aarch64/aarch64.c6
3 files changed, 19 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 99f2bdb32d7..1967e922798 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,16 @@
2016-01-28 Wilco Dijkstra <wdijkstr@arm.com>
+ * config/aarch64/aarch64.c (generic_vector_cost):
+ Set vec_permute_cost.
+ (cortexa57_vector_cost): Likewise.
+ (exynosm1_vector_cost): Likewise.
+ (xgene1_vector_cost): Likewise.
+ (aarch64_builtin_vectorization_cost): Use vec_permute_cost.
+ * config/aarch64/aarch64-protos.h (cpu_vector_cost):
+ Add vec_permute_cost entry.
+
+2016-01-28 Wilco Dijkstra <wdijkstr@arm.com>
+
* config/aarch64/aarch64.md (ccmp<mode>): Disassemble
immediate as %1.
(add<mode>3_compare0): Likewise.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 15fc37deb9a..bd900c6269f 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -156,9 +156,10 @@ struct cpu_vector_cost
const int scalar_load_cost; /* Cost of scalar load. */
const int scalar_store_cost; /* Cost of scalar store. */
const int vec_stmt_cost; /* Cost of any vector operation,
- excluding load, store,
+ excluding load, store, permute,
vector-to-scalar and
scalar-to-vector operation. */
+ const int vec_permute_cost; /* Cost of permute operation. */
const int vec_to_scalar_cost; /* Cost of vec-to-scalar operation. */
const int scalar_to_vec_cost; /* Cost of scalar-to-vector
operation. */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index df3dec0a72b..5b3771eca67 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -315,6 +315,7 @@ static const struct cpu_vector_cost generic_vector_cost =
1, /* scalar_load_cost */
1, /* scalar_store_cost */
1, /* vec_stmt_cost */
+ 2, /* vec_permute_cost */
1, /* vec_to_scalar_cost */
1, /* scalar_to_vec_cost */
1, /* vec_align_load_cost */
@@ -332,6 +333,7 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
4, /* scalar_load_cost */
1, /* scalar_store_cost */
3, /* vec_stmt_cost */
+ 3, /* vec_permute_cost */
8, /* vec_to_scalar_cost */
8, /* scalar_to_vec_cost */
5, /* vec_align_load_cost */
@@ -348,6 +350,7 @@ static const struct cpu_vector_cost exynosm1_vector_cost =
5, /* scalar_load_cost */
1, /* scalar_store_cost */
3, /* vec_stmt_cost */
+ 3, /* vec_permute_cost */
3, /* vec_to_scalar_cost */
3, /* scalar_to_vec_cost */
5, /* vec_align_load_cost */
@@ -365,6 +368,7 @@ static const struct cpu_vector_cost xgene1_vector_cost =
5, /* scalar_load_cost */
1, /* scalar_store_cost */
2, /* vec_stmt_cost */
+ 2, /* vec_permute_cost */
4, /* vec_to_scalar_cost */
4, /* scalar_to_vec_cost */
10, /* vec_align_load_cost */
@@ -7574,6 +7578,8 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
return aarch64_tune_params.vec_costs->cond_not_taken_branch_cost;
case vec_perm:
+ return aarch64_tune_params.vec_costs->vec_permute_cost;
+
case vec_promote_demote:
return aarch64_tune_params.vec_costs->vec_stmt_cost;