diff options
author | Andre Vieira <andre.simoesdiasvieira@arm.com> | 2021-02-09 18:09:19 +0000 |
---|---|---|
committer | Andre Vieira <andre.simoesdiasvieira@arm.com> | 2021-02-09 18:24:22 +0000 |
commit | a11ef53238c8ebaab9a3fbf200cb8b5c997b473b (patch) | |
tree | e393fec2a59117a3b3c43f37c9b50275ab865b1d | |
parent | 73e52926b6c5f02fb3f34706b4afb2d8bcd86638 (diff) | |
download | gcc-a11ef53238c8ebaab9a3fbf200cb8b5c997b473b.tar.gz |
aarch64: fix vector multiplication costs
This patch introduces a vect.mul RTX cost and decouples the vector
multiplication costing from the scalar one.
gcc/ChangeLog:
2021-02-09 Andre Vieira <andre.simoesdiasvieira@arm.com>
* config/aarch64/aarch64-cost-tables.h: Add entries for vect.mul.
* config/aarch64/aarch64.c (aarch64_rtx_mult_cost): Use vect.mul for
vector multiplies and vect.alu for SSRA.
* config/arm/aarch-common-protos.h (struct vector_cost_table): Define
vect.mul cost field.
* config/arm/aarch-cost-tables.h: Add entries for vect.mul.
* config/arm/arm.c: Likewise.
gcc/testsuite/ChangeLog:
2021-02-09 Andre Vieira <andre.simoesdiasvieira@arm.com>
* gcc.target/aarch64/asimd-mul-to-shl-sub.c: New test.
-rw-r--r-- | gcc/config/aarch64/aarch64-cost-tables.h | 18 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 12 | ||||
-rw-r--r-- | gcc/config/arm/aarch-common-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/arm/aarch-cost-tables.h | 18 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/asimd-mul-to-shl-sub.c | 17 |
6 files changed, 67 insertions, 20 deletions
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h index c309f88cbd5..dd2e7e7cbb1 100644 --- a/gcc/config/aarch64/aarch64-cost-tables.h +++ b/gcc/config/aarch64/aarch64-cost-tables.h @@ -123,7 +123,8 @@ const struct cpu_cost_table qdf24xx_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; @@ -227,7 +228,8 @@ const struct cpu_cost_table thunderx_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* Alu. */ + COSTS_N_INSNS (1), /* Alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; @@ -330,7 +332,8 @@ const struct cpu_cost_table thunderx2t99_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* Alu. */ + COSTS_N_INSNS (1), /* Alu. */ + COSTS_N_INSNS (4) /* Mult. */ } }; @@ -433,7 +436,8 @@ const struct cpu_cost_table thunderx3t110_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* Alu. */ + COSTS_N_INSNS (1), /* Alu. */ + COSTS_N_INSNS (4) /* Mult. */ } }; @@ -537,7 +541,8 @@ const struct cpu_cost_table tsv110_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; @@ -640,7 +645,8 @@ const struct cpu_cost_table a64fx_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index b6192e55521..146ed8c1b69 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -11568,7 +11568,6 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed) if (VECTOR_MODE_P (mode)) { unsigned int vec_flags = aarch64_classify_vector_mode (mode); - mode = GET_MODE_INNER (mode); if (vec_flags & VEC_ADVSIMD) { /* The by-element versions of the instruction have the same costs as @@ -11582,6 +11581,17 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed) else if (GET_CODE (op1) == VEC_DUPLICATE) op1 = XEXP (op1, 0); } + cost += rtx_cost (op0, mode, MULT, 0, speed); + cost += rtx_cost (op1, mode, MULT, 1, speed); + if (speed) + { + if (GET_CODE (x) == MULT) + cost += extra_cost->vect.mult; + /* This is to catch the SSRA costing currently flowing here. */ + else + cost += extra_cost->vect.alu; + } + return cost; } /* Integer multiply/fma. */ diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 251de3d61a8..7a9cf3d324c 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -132,6 +132,7 @@ struct fp_cost_table struct vector_cost_table { const int alu; + const int mult; }; struct cpu_cost_table diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h index d4baee4f83a..25ff702f01f 100644 --- a/gcc/config/arm/aarch-cost-tables.h +++ b/gcc/config/arm/aarch-cost-tables.h @@ -121,7 +121,8 @@ const struct cpu_cost_table generic_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; @@ -224,7 +225,8 @@ const struct cpu_cost_table cortexa53_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; @@ -327,7 +329,8 @@ const struct cpu_cost_table cortexa57_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; @@ -430,7 +433,8 @@ const struct cpu_cost_table cortexa76_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; @@ -533,7 +537,8 @@ const struct cpu_cost_table exynosm1_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (0) /* alu. */ + COSTS_N_INSNS (0), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; @@ -636,7 +641,8 @@ const struct cpu_cost_table xgene1_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (2) /* alu. */ + COSTS_N_INSNS (2), /* alu. */ + COSTS_N_INSNS (8) /* mult. */ } }; diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index e22396dbcd5..d254f417541 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -1192,7 +1192,8 @@ const struct cpu_cost_table cortexa9_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; @@ -1295,7 +1296,8 @@ const struct cpu_cost_table cortexa8_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; @@ -1399,7 +1401,8 @@ const struct cpu_cost_table cortexa5_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; @@ -1504,7 +1507,8 @@ const struct cpu_cost_table cortexa7_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; @@ -1607,7 +1611,8 @@ const struct cpu_cost_table cortexa12_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; @@ -1710,7 +1715,8 @@ const struct cpu_cost_table cortexa15_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; @@ -1813,7 +1819,8 @@ const struct cpu_cost_table v7m_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (4) /* mult. */ } }; diff --git a/gcc/testsuite/gcc.target/aarch64/asimd-mul-to-shl-sub.c b/gcc/testsuite/gcc.target/aarch64/asimd-mul-to-shl-sub.c new file mode 100644 index 00000000000..d7c5e5f341b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/asimd-mul-to-shl-sub.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-Ofast" } */ + +/* +**foo: +** shl v1.4s, v0.4s, 16 +** sub v0.4s, v1.4s, v0.4s +** ret +*/ +#include <arm_neon.h> +uint32x4_t foo (uint32x4_t a) +{ + return a * 65535; +} + +/* { dg-final { check-function-bodies "**" "" "" } } */ |