diff options
author | gretay <gretay@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-04-05 17:26:12 +0000 |
---|---|---|
committer | gretay <gretay@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-04-05 17:26:12 +0000 |
commit | 353cf59ab0f2faebfb9b9164b2dfe6784d126c37 (patch) | |
tree | 372372b89c0e359b781b42285cc48fa5324b4f47 | |
parent | cb860649ffb738ff96ddaf038e13cd13a99cd21d (diff) | |
download | gcc-353cf59ab0f2faebfb9b9164b2dfe6784d126c37.tar.gz |
2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com>
gcc/testsuite
* gcc.target/arm/peep-ldrd-1.c: New test.
* gcc.target/arm/peep-strd-1.c: Likewise.
gcc/
* config/arm/constraints.md (q): New constraint.
* config/arm/ldrdstrd.md: New file.
* config/arm/arm.md (ldrdstrd.md) New include.
(arm_movdi): Use "q" instead of "r" constraint
for double-word memory access.
(movdf_soft_insn): Likewise.
* config/arm/vfp.md (movdi_vfp): Likewise.
* config/arm/t-arm (MD_INCLUDES): Add ldrdstrd.md.
* config/arm/arm-protos.h (gen_operands_ldrd_strd): New declaration.
* config/arm/arm.c (gen_operands_ldrd_strd): New function.
(mem_ok_for_ldrd_strd): Likewise.
(output_move_double): Update assertion.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@197530 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/config/arm/arm-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 274 | ||||
-rw-r--r-- | gcc/config/arm/arm.md | 11 | ||||
-rw-r--r-- | gcc/config/arm/constraints.md | 5 | ||||
-rw-r--r-- | gcc/config/arm/ldrdstrd.md | 260 | ||||
-rw-r--r-- | gcc/config/arm/t-arm | 1 | ||||
-rw-r--r-- | gcc/config/arm/vfp.md | 4 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/peep-ldrd-1.c | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/peep-strd-1.c | 9 |
11 files changed, 588 insertions, 8 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9e0e5c9542f..1412a6ac87a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,20 @@ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com> + * config/arm/constraints.md (q): New constraint. + * config/arm/ldrdstrd.md: New file. + * config/arm/arm.md (ldrdstrd.md) New include. + (arm_movdi): Use "q" instead of "r" constraint + for double-word memory access. + (movdf_soft_insn): Likewise. + * config/arm/vfp.md (movdi_vfp): Likewise. + * config/arm/t-arm (MD_INCLUDES): Add ldrdstrd.md. + * config/arm/arm-protos.h (gen_operands_ldrd_strd): New declaration. + * config/arm/arm.c (gen_operands_ldrd_strd): New function. + (mem_ok_for_ldrd_strd): Likewise. + (output_move_double): Update assertion. + +2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com> + * config/arm/arm.md: Comment on splitting Thumb1 patterns. 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 4274c0dbd7b..a6af9275712 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -118,6 +118,7 @@ extern rtx arm_gen_load_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *); extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *); extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT); extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool); +extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool); extern int arm_gen_movmemqi (rtx *); extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx); extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx, diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 1558fb0eef9..af95ac1d407 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -12636,6 +12636,277 @@ operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset, return true; } +/* Helper for gen_operands_ldrd_strd. Returns true iff the memory + operand ADDR is an immediate offset from the base register and is + not volatile, in which case it sets BASE and OFFSET + accordingly. */ +bool +mem_ok_for_ldrd_strd (rtx addr, rtx *base, rtx *offset) +{ + /* TODO: Handle more general memory operand patterns, such as + PRE_DEC and PRE_INC. */ + + /* Convert a subreg of mem into mem itself. */ + if (GET_CODE (addr) == SUBREG) + addr = alter_subreg (&addr, true); + + gcc_assert (MEM_P (addr)); + + /* Don't modify volatile memory accesses. */ + if (MEM_VOLATILE_P (addr)) + return false; + + *offset = const0_rtx; + + addr = XEXP (addr, 0); + if (REG_P (addr)) + { + *base = addr; + return true; + } + else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS) + { + *base = XEXP (addr, 0); + *offset = XEXP (addr, 1); + return (REG_P (*base) && CONST_INT_P (*offset)); + } + + return false; +} + +#define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0) + +/* Called from a peephole2 to replace two word-size accesses with a + single LDRD/STRD instruction. Returns true iff we can generate a + new instruction sequence. That is, both accesses use the same base + register and the gap between constant offsets is 4. This function + may reorder its operands to match ldrd/strd RTL templates. + OPERANDS are the operands found by the peephole matcher; + OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the + corresponding memory operands. LOAD indicaates whether the access + is load or store. CONST_STORE indicates a store of constant + integer values held in OPERANDS[4,5] and assumes that the pattern + is of length 4 insn, for the purpose of checking dead registers. + COMMUTE indicates that register operands may be reordered. */ +bool +gen_operands_ldrd_strd (rtx *operands, bool load, + bool const_store, bool commute) +{ + int nops = 2; + HOST_WIDE_INT offsets[2], offset; + rtx base; + rtx cur_base, cur_offset, tmp; + int i, gap; + HARD_REG_SET regset; + + gcc_assert (!const_store || !load); + /* Check that the memory references are immediate offsets from the + same base register. Extract the base register, the destination + registers, and the corresponding memory offsets. */ + for (i = 0; i < nops; i++) + { + if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset)) + return false; + + if (i == 0) + base = cur_base; + else if (REGNO (base) != REGNO (cur_base)) + return false; + + offsets[i] = INTVAL (cur_offset); + if (GET_CODE (operands[i]) == SUBREG) + { + tmp = SUBREG_REG (operands[i]); + gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp)); + operands[i] = tmp; + } + } + + /* Make sure there is no dependency between the individual loads. */ + if (load && REGNO (operands[0]) == REGNO (base)) + return false; /* RAW */ + + if (load && REGNO (operands[0]) == REGNO (operands[1])) + return false; /* WAW */ + + /* If the same input register is used in both stores + when storing different constants, try to find a free register. + For example, the code + mov r0, 0 + str r0, [r2] + mov r0, 1 + str r0, [r2, #4] + can be transformed into + mov r1, 0 + strd r1, r0, [r2] + in Thumb mode assuming that r1 is free. */ + if (const_store + && REGNO (operands[0]) == REGNO (operands[1]) + && INTVAL (operands[4]) != INTVAL (operands[5])) + { + if (TARGET_THUMB2) + { + CLEAR_HARD_REG_SET (regset); + tmp = peep2_find_free_register (0, 4, "r", SImode, ®set); + if (tmp == NULL_RTX) + return false; + + /* Use the new register in the first load to ensure that + if the original input register is not dead after peephole, + then it will have the correct constant value. */ + operands[0] = tmp; + } + else if (TARGET_ARM) + { + return false; + int regno = REGNO (operands[0]); + if (!peep2_reg_dead_p (4, operands[0])) + { + /* When the input register is even and is not dead after the + pattern, it has to hold the second constant but we cannot + form a legal STRD in ARM mode with this register as the second + register. */ + if (regno % 2 == 0) + return false; + + /* Is regno-1 free? */ + SET_HARD_REG_SET (regset); + CLEAR_HARD_REG_BIT(regset, regno - 1); + tmp = peep2_find_free_register (0, 4, "r", SImode, ®set); + if (tmp == NULL_RTX) + return false; + + operands[0] = tmp; + } + else + { + /* Find a DImode register. */ + CLEAR_HARD_REG_SET (regset); + tmp = peep2_find_free_register (0, 4, "r", DImode, ®set); + if (tmp != NULL_RTX) + { + operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0); + operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4); + } + else + { + /* Can we use the input register to form a DI register? */ + SET_HARD_REG_SET (regset); + CLEAR_HARD_REG_BIT(regset, + regno % 2 == 0 ? regno + 1 : regno - 1); + tmp = peep2_find_free_register (0, 4, "r", SImode, ®set); + if (tmp == NULL_RTX) + return false; + operands[regno % 2 == 1 ? 0 : 1] = tmp; + } + } + + gcc_assert (operands[0] != NULL_RTX); + gcc_assert (operands[1] != NULL_RTX); + gcc_assert (REGNO (operands[0]) % 2 == 0); + gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1); + } + } + + /* Make sure the instructions are ordered with lower memory access first. */ + if (offsets[0] > offsets[1]) + { + gap = offsets[0] - offsets[1]; + offset = offsets[1]; + + /* Swap the instructions such that lower memory is accessed first. */ + SWAP_RTX (operands[0], operands[1]); + SWAP_RTX (operands[2], operands[3]); + if (const_store) + SWAP_RTX (operands[4], operands[5]); + } + else + { + gap = offsets[1] - offsets[0]; + offset = offsets[0]; + } + + /* Make sure accesses are to consecutive memory locations. */ + if (gap != 4) + return false; + + /* Make sure we generate legal instructions. */ + if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset, + false, load)) + return true; + + /* In Thumb state, where registers are almost unconstrained, there + is little hope to fix it. */ + if (TARGET_THUMB2) + return false; + + if (load && commute) + { + /* Try reordering registers. */ + SWAP_RTX (operands[0], operands[1]); + if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset, + false, load)) + return true; + } + + if (const_store) + { + /* If input registers are dead after this pattern, they can be + reordered or replaced by other registers that are free in the + current pattern. */ + if (!peep2_reg_dead_p (4, operands[0]) + || !peep2_reg_dead_p (4, operands[1])) + return false; + + /* Try to reorder the input registers. */ + /* For example, the code + mov r0, 0 + mov r1, 1 + str r1, [r2] + str r0, [r2, #4] + can be transformed into + mov r1, 0 + mov r0, 1 + strd r0, [r2] + */ + if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset, + false, false)) + { + SWAP_RTX (operands[0], operands[1]); + return true; + } + + /* Try to find a free DI register. */ + CLEAR_HARD_REG_SET (regset); + add_to_hard_reg_set (®set, SImode, REGNO (operands[0])); + add_to_hard_reg_set (®set, SImode, REGNO (operands[1])); + while (true) + { + tmp = peep2_find_free_register (0, 4, "r", DImode, ®set); + if (tmp == NULL_RTX) + return false; + + /* DREG must be an even-numbered register in DImode. + Split it into SI registers. */ + operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0); + operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4); + gcc_assert (operands[0] != NULL_RTX); + gcc_assert (operands[1] != NULL_RTX); + gcc_assert (REGNO (operands[0]) % 2 == 0); + gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1])); + + return (operands_ok_ldrd_strd (operands[0], operands[1], + base, offset, + false, load)); + } + } + + return false; +} +#undef SWAP_RTX + + + /* Print a symbolic form of X to the debug file, F. */ static void @@ -14825,7 +15096,8 @@ output_move_double (rtx *operands, bool emit, int *count) { /* Constraints should ensure this. */ gcc_assert (code0 == MEM && code1 == REG); - gcc_assert (REGNO (operands[1]) != IP_REGNUM); + gcc_assert ((REGNO (operands[1]) != IP_REGNUM) + || (TARGET_ARM && TARGET_LDRD)); switch (GET_CODE (XEXP (operands[0], 0))) { diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index a1789a27c44..b631e9e7341 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -5729,8 +5729,8 @@ ) (define_insn "*arm_movdi" - [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m") - (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r"))] + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, q, m") + (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,q"))] "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_VFP) && !TARGET_IWMMXT @@ -7154,8 +7154,8 @@ ) (define_insn "*movdf_soft_insn" - [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,r,m") - (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,r"))] + [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,q,m") + (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,q"))] "TARGET_32BIT && TARGET_SOFT_FLOAT && ( register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode))" @@ -12116,6 +12116,9 @@ (set_attr "predicable" "yes")]) +;; Load the load/store double peephole optimizations. +(include "ldrdstrd.md") + ;; Load the load/store multiple patterns (include "ldmstm.md") diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index 775f8afd7f3..8974f4ee2ce 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -21,7 +21,7 @@ ;; The following register constraints have been used: ;; - in ARM/Thumb-2 state: t, w, x, y, z ;; - in Thumb state: h, b -;; - in both states: l, c, k +;; - in both states: l, c, k, q ;; In ARM state, 'l' is an alias for 'r' ;; 'f' and 'v' were previously used for FPA and MAVERICK registers. @@ -86,6 +86,9 @@ (define_register_constraint "k" "STACK_REG" "@internal The stack register.") +(define_register_constraint "q" "(TARGET_ARM && TARGET_LDRD) ? CORE_REGS : GENERAL_REGS" + "@internal In ARM state with LDRD support, core registers, otherwise general registers.") + (define_register_constraint "b" "TARGET_THUMB ? BASE_REGS : NO_REGS" "@internal Thumb only. The union of the low registers and the stack register.") diff --git a/gcc/config/arm/ldrdstrd.md b/gcc/config/arm/ldrdstrd.md new file mode 100644 index 00000000000..58c883ef1cb --- /dev/null +++ b/gcc/config/arm/ldrdstrd.md @@ -0,0 +1,260 @@ +;; ARM ldrd/strd peephole optimizations. +;; +;; Copyright (C) 2013 Free Software Foundation, Inc. +;; +;; Written by Greta Yorsh <greta.yorsh@arm.com> + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; The following peephole optimizations identify consecutive memory +;; accesses, and try to rearrange the operands to enable generation of +;; ldrd/strd. + +(define_peephole2 ; ldrd + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "TARGET_LDRD + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)" + [(const_int 0)] +{ + if (!gen_operands_ldrd_strd (operands, true, false, false)) + FAIL; + else if (TARGET_ARM) + { + /* In ARM state, the destination registers of LDRD/STRD must be + consecutive. We emit DImode access. */ + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + /* Emit [(set (match_dup 0) (match_dup 2))] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2])); + DONE; + } + else if (TARGET_THUMB2) + { + /* Emit the pattern: + [(parallel [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))])] */ + rtx t1 = gen_rtx_SET (VOIDmode, operands[0], operands[2]); + rtx t2 = gen_rtx_SET (VOIDmode, operands[1], operands[3]); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); + DONE; + } +}) + +(define_peephole2 ; strd + [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "arm_general_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "arm_general_register_operand" ""))] + "TARGET_LDRD + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)" + [(const_int 0)] +{ + if (!gen_operands_ldrd_strd (operands, false, false, false)) + FAIL; + else if (TARGET_ARM) + { + /* In ARM state, the destination registers of LDRD/STRD must be + consecutive. We emit DImode access. */ + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + /* Emit [(set (match_dup 2) (match_dup 0))] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[0])); + DONE; + } + else if (TARGET_THUMB2) + { + /* Emit the pattern: + [(parallel [(set (match_dup 2) (match_dup 0)) + (set (match_dup 3) (match_dup 1))])] */ + rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]); + rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); + DONE; + } +}) + +;; The following peepholes reorder registers to enable LDRD/STRD. +(define_peephole2 ; strd of constants + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (set (match_operand:SI 2 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 5 "const_int_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_dup 1))] + "TARGET_LDRD + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)" + [(const_int 0)] +{ + if (!gen_operands_ldrd_strd (operands, false, true, false)) + FAIL; + else if (TARGET_ARM) + { + rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + /* Emit the pattern: + [(set (match_dup 0) (match_dup 4)) + (set (match_dup 1) (match_dup 5)) + (set (match_dup 2) tmp)] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); + emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); + emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp)); + DONE; + } + else if (TARGET_THUMB2) + { + /* Emit the pattern: + [(set (match_dup 0) (match_dup 4)) + (set (match_dup 1) (match_dup 5)) + (parallel [(set (match_dup 2) (match_dup 0)) + (set (match_dup 3) (match_dup 1))])] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); + emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); + rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]); + rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); + DONE; + } +}) + +(define_peephole2 ; strd of constants + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 5 "const_int_operand" "")) + (set (match_operand:SI 2 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 3 "memory_operand" "") + (match_dup 1))] + "TARGET_LDRD + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)" + [(const_int 0)] +{ + if (!gen_operands_ldrd_strd (operands, false, true, false)) + FAIL; + else if (TARGET_ARM) + { + rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + /* Emit the pattern + [(set (match_dup 0) (match_dup 4)) + (set (match_dup 1) (match_dup 5)) + (set (match_dup 2) tmp)] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); + emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); + emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp)); + DONE; + } + else if (TARGET_THUMB2) + { + /* Emit the pattern: + [(set (match_dup 0) (match_dup 4)) + (set (match_dup 1) (match_dup 5)) + (parallel [(set (match_dup 2) (match_dup 0)) + (set (match_dup 3) (match_dup 1))])] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); + emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); + rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]); + rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); + DONE; + } +}) + +;; The following two peephole optimizations are only relevant for ARM +;; mode where LDRD/STRD require consecutive registers. + +(define_peephole2 ; swap the destination registers of two loads + ; before a commutative operation. + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (set (match_operand:SI 4 "arm_general_register_operand" "") + (match_operator:SI 5 "commutative_binary_operator" + [(match_operand 6 "arm_general_register_operand" "") + (match_operand 7 "arm_general_register_operand" "") ]))] + "TARGET_LDRD && TARGET_ARM + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun) + && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7]))) + ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6])))) + && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4])) + && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))] + { + if (!gen_operands_ldrd_strd (operands, true, false, true)) + { + FAIL; + } + else + { + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + } + } +) + +(define_peephole2 ; swap the destination registers of two loads + ; before a commutative operation that sets the flags. + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (parallel + [(set (match_operand:SI 4 "arm_general_register_operand" "") + (match_operator:SI 5 "commutative_binary_operator" + [(match_operand 6 "arm_general_register_operand" "") + (match_operand 7 "arm_general_register_operand" "") ])) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_LDRD && TARGET_ARM + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun) + && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7]))) + ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6])))) + && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4])) + && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))" + [(set (match_dup 0) (match_dup 2)) + (parallel + [(set (match_dup 4) + (match_op_dup 5 [(match_dup 6) (match_dup 7)])) + (clobber (reg:CC CC_REGNUM))])] + { + if (!gen_operands_ldrd_strd (operands, true, false, true)) + { + FAIL; + } + else + { + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + } + } +) + +;; TODO: Handle LDRD/STRD with writeback: +;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY +;; (b) Patterns may be followed by an update of the base address. diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm index 40ff4501a8c..fe075e5862a 100644 --- a/gcc/config/arm/t-arm +++ b/gcc/config/arm/t-arm @@ -53,6 +53,7 @@ MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \ $(srcdir)/config/arm/iwmmxt.md \ $(srcdir)/config/arm/iwmmxt2.md \ $(srcdir)/config/arm/ldmstm.md \ + $(srcdir)/config/arm/ldrdstrd.md \ $(srcdir)/config/arm/marvell-f-iwmmxt.md \ $(srcdir)/config/arm/neon.md \ $(srcdir)/config/arm/predicates.md \ diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 923624ffc6f..1930cddb835 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -132,8 +132,8 @@ ;; DImode moves (define_insn "*movdi_vfp" - [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,r,w,w, Uv") - (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,r,r,w,w,Uvi,w"))] + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,r,w,w, Uv") + (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))] "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune != cortexa8 && ( register_operand (operands[0], DImode) || register_operand (operands[1], DImode)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b60003fbaf5..68f778673a9 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,10 @@ 2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com> + * gcc.target/arm/peep-ldrd-1.c: New test. + * gcc.target/arm/peep-strd-1.c: Likewise. + +2013-04-05 Greta Yorsh <Greta.Yorsh@arm.com> + * gcc.target/arm/negdi-1.c: New test. * gcc.target/arm/negdi-2.c: Likewise. * gcc.target/arm/negdi-3.c: Likewise. diff --git a/gcc/testsuite/gcc.target/arm/peep-ldrd-1.c b/gcc/testsuite/gcc.target/arm/peep-ldrd-1.c new file mode 100644 index 00000000000..eb2b86ee7b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/peep-ldrd-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_prefer_ldrd_strd } */ +/* { dg-options "-O2" } */ +int foo(int a, int b, int* p, int *q) +{ + a = p[2] + p[3]; + *q = a; + *p = a; + return a; +} +/* { dg-final { scan-assembler "ldrd" } } */ diff --git a/gcc/testsuite/gcc.target/arm/peep-strd-1.c b/gcc/testsuite/gcc.target/arm/peep-strd-1.c new file mode 100644 index 00000000000..bd330769599 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/peep-strd-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_prefer_ldrd_strd } */ +/* { dg-options "-O2" } */ +void foo(int a, int b, int* p) +{ + p[2] = a; + p[3] = b; +} +/* { dg-final { scan-assembler "strd" } } */ |