diff options
Diffstat (limited to 'gcc/loop-doloop.c')
-rw-r--r-- | gcc/loop-doloop.c | 553 |
1 files changed, 553 insertions, 0 deletions
diff --git a/gcc/loop-doloop.c b/gcc/loop-doloop.c new file mode 100644 index 00000000000..b75b9dc03a6 --- /dev/null +++ b/gcc/loop-doloop.c @@ -0,0 +1,553 @@ +/* Perform doloop optimizations + Copyright (C) 2004 Free Software Foundation, Inc. + Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "flags.h" +#include "expr.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "toplev.h" +#include "tm_p.h" +#include "cfgloop.h" +#include "output.h" +#include "params.h" + +/* This module is used to modify loops with a determinable number of + iterations to use special low-overhead looping instructions. + + It first validates whether the loop is well behaved and has a + determinable number of iterations (either at compile or run-time). + It then modifies the loop to use a low-overhead looping pattern as + follows: + + 1. A pseudo register is allocated as the loop iteration counter. + + 2. The number of loop iterations is calculated and is stored + in the loop counter. + + 3. At the end of the loop, the jump insn is replaced by the + doloop_end pattern. The compare must remain because it might be + used elsewhere. If the loop-variable or condition register are + used elsewhere, they will be eliminated by flow. + + 4. An optional doloop_begin pattern is inserted at the top of the + loop. + + TODO The optimization should only performed when either the biv used for exit + condition is unused at all except for the exit test, or if we do not have to + change its value, since otherwise we have to add a new induction variable, + which usually will not pay up (unless the cost of the doloop pattern is + somehow extremely lower than the cost of compare & jump, or unless the bct + register cannot be used for anything else but doloop -- ??? detect these + cases). */ + +#ifdef HAVE_doloop_end + +/* Return the loop termination condition for PATTERN or zero + if it is not a decrement and branch jump insn. */ + +static rtx +doloop_condition_get (rtx pattern) +{ + rtx cmp; + rtx inc; + rtx reg; + rtx condition; + + /* The canonical doloop pattern we expect is: + + (parallel [(set (pc) (if_then_else (condition) + (label_ref (label)) + (pc))) + (set (reg) (plus (reg) (const_int -1))) + (additional clobbers and uses)]) + + Some machines (IA-64) make the decrement conditional on + the condition as well, so we don't bother verifying the + actual decrement. In summary, the branch must be the + first entry of the parallel (also required by jump.c), + and the second entry of the parallel must be a set of + the loop counter register. */ + + if (GET_CODE (pattern) != PARALLEL) + return 0; + + cmp = XVECEXP (pattern, 0, 0); + inc = XVECEXP (pattern, 0, 1); + + /* Check for (set (reg) (something)). */ + if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc))) + return 0; + + /* Extract loop counter register. */ + reg = SET_DEST (inc); + + /* Check for (set (pc) (if_then_else (condition) + (label_ref (label)) + (pc))). */ + if (GET_CODE (cmp) != SET + || SET_DEST (cmp) != pc_rtx + || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE + || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF + || XEXP (SET_SRC (cmp), 2) != pc_rtx) + return 0; + + /* Extract loop termination condition. */ + condition = XEXP (SET_SRC (cmp), 0); + + if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE) + || GET_CODE (XEXP (condition, 1)) != CONST_INT) + return 0; + + if (XEXP (condition, 0) == reg) + return condition; + + if (GET_CODE (XEXP (condition, 0)) == PLUS + && XEXP (XEXP (condition, 0), 0) == reg) + return condition; + + /* ??? If a machine uses a funny comparison, we could return a + canonicalised form here. */ + + return 0; +} + +/* Return nonzero if the loop specified by LOOP is suitable for + the use of special low-overhead looping instructions. DESC + describes the number of iterations of the loop. */ + +static bool +doloop_valid_p (struct loop *loop, struct niter_desc *desc) +{ + basic_block *body = get_loop_body (loop), bb; + rtx insn; + unsigned i; + + /* Check for loops that may not terminate under special conditions. */ + if (!desc->simple_p + || desc->assumptions + || desc->infinite) + { + /* There are some cases that would require a special attention. + For example if the comparison is LEU and the comparison value + is UINT_MAX then the loop will not terminate. Similarly, if the + comparison code is GEU and the comparison value is 0, the + loop will not terminate. + + If the absolute increment is not 1, the loop can be infinite + even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2) + + ??? We could compute these conditions at run-time and have a + additional jump around the loop to ensure an infinite loop. + However, it is very unlikely that this is the intended + behavior of the loop and checking for these rare boundary + conditions would pessimize all other code. + + If the loop is executed only a few times an extra check to + restart the loop could use up most of the benefits of using a + count register loop. Note however, that normally, this + restart branch would never execute, so it could be predicted + well by the CPU. We should generate the pessimistic code by + default, and have an option, e.g. -funsafe-loops that would + enable count-register loops in this case. */ + if (dump_file) + fprintf (dump_file, "Doloop: Possible infinite iteration case.\n"); + return false; + } + + for (i = 0; i < loop->num_nodes; i++) + { + bb = body[i]; + + for (insn = BB_HEAD (bb); + insn != NEXT_INSN (BB_END (bb)); + insn = NEXT_INSN (insn)) + { + /* A called function may clobber any special registers required for + low-overhead looping. */ + if (GET_CODE (insn) == CALL_INSN) + { + if (dump_file) + fprintf (dump_file, "Doloop: Function call in loop.\n"); + return false; + } + + /* Some targets (eg, PPC) use the count register for branch on table + instructions. ??? This should be a target specific check. */ + if (GET_CODE (insn) == JUMP_INSN + && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC + || GET_CODE (PATTERN (insn)) == ADDR_VEC)) + { + if (dump_file) + fprintf (dump_file, "Doloop: Computed branch in the loop.\n"); + return false; + } + } + } + free (body); + + return true; +} + +/* Adds test of COND jumping to DEST to the end of BB. */ + +static void +add_test (rtx cond, basic_block bb, basic_block dest) +{ + rtx seq, jump, label; + enum machine_mode mode; + rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1); + enum rtx_code code = GET_CODE (cond); + + mode = GET_MODE (XEXP (cond, 0)); + if (mode == VOIDmode) + mode = GET_MODE (XEXP (cond, 1)); + + start_sequence (); + op0 = force_operand (op0, NULL_RTX); + op1 = force_operand (op1, NULL_RTX); + label = block_label (dest); + do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label); + + jump = get_last_insn (); + JUMP_LABEL (jump) = label; + + /* The jump is supposed to handle an unlikely special case. */ + REG_NOTES (jump) + = gen_rtx_EXPR_LIST (REG_BR_PROB, + GEN_INT (0), REG_NOTES (jump)); + + LABEL_NUSES (label)++; + + seq = get_insns (); + end_sequence (); + emit_insn_after (seq, BB_END (bb)); +} + +/* Modify the loop to use the low-overhead looping insn where LOOP + describes the loop, DESC describes the number of iterations of the + loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the + end of the loop. CONDITION is the condition separated from the + DOLOOP_SEQ. */ + +static void +doloop_modify (struct loop *loop, struct niter_desc *desc, + rtx doloop_seq, rtx condition) +{ + rtx counter_reg; + rtx count, tmp, noloop = NULL_RTX; + rtx sequence; + rtx jump_insn; + rtx jump_label; + int nonneg = 0, irr; + bool increment_count; + basic_block loop_end = desc->out_edge->src; + + jump_insn = BB_END (loop_end); + + if (dump_file) + { + fprintf (dump_file, "Doloop: Inserting doloop pattern ("); + if (desc->const_iter) + fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); + else + fputs ("runtime", dump_file); + fputs (" iterations).\n", dump_file); + } + + /* Discard original jump to continue loop. The original compare + result may still be live, so it cannot be discarded explicitly. */ + delete_insn (jump_insn); + + counter_reg = XEXP (condition, 0); + if (GET_CODE (counter_reg) == PLUS) + counter_reg = XEXP (counter_reg, 0); + + count = desc->niter_expr; + increment_count = false; + switch (GET_CODE (condition)) + { + case NE: + /* Currently only NE tests against zero and one are supported. */ + if (XEXP (condition, 1) == const1_rtx) + { + increment_count = true; + noloop = const1_rtx; + } + else if (XEXP (condition, 1) == const0_rtx) + noloop = const0_rtx; + else + abort (); + break; + + case GE: + /* Currently only GE tests against zero are supported. */ + if (XEXP (condition, 1) != const0_rtx) + abort (); + + noloop = constm1_rtx; + + /* The iteration count does not need incrementing for a GE test. */ + increment_count = false; + + /* Determine if the iteration counter will be non-negative. + Note that the maximum value loaded is iterations_max - 1. */ + if (desc->niter_max + <= ((unsigned HOST_WIDEST_INT) 1 + << (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1))) + nonneg = 1; + break; + + /* Abort if an invalid doloop pattern has been generated. */ + default: + abort (); + } + + if (increment_count) + count = simplify_gen_binary (PLUS, desc->mode, count, const1_rtx); + + /* Insert initialization of the count register into the loop header. */ + start_sequence (); + tmp = force_operand (count, counter_reg); + convert_move (counter_reg, tmp, 1); + sequence = get_insns (); + end_sequence (); + emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); + + if (desc->noloop_assumptions) + { + rtx ass = desc->noloop_assumptions; + basic_block preheader = loop_preheader_edge (loop)->src; + basic_block set_zero + = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); + basic_block new_preheader + = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); + basic_block bb; + edge te; + gcov_type cnt; + + /* Expand the condition testing the assumptions and if it does not pass, + reset the count register to 0. */ + add_test (XEXP (ass, 0), preheader, set_zero); + preheader->succ->flags &= ~EDGE_FALLTHRU; + cnt = preheader->succ->count; + preheader->succ->probability = 0; + preheader->succ->count = 0; + irr = preheader->succ->flags & EDGE_IRREDUCIBLE_LOOP; + te = make_edge (preheader, new_preheader, EDGE_FALLTHRU | irr); + te->probability = REG_BR_PROB_BASE; + te->count = cnt; + set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); + + set_zero->count = 0; + set_zero->frequency = 0; + + for (ass = XEXP (ass, 1); ass; ass = XEXP (ass, 1)) + { + bb = loop_split_edge_with (te, NULL_RTX); + te = bb->succ; + add_test (XEXP (ass, 0), bb, set_zero); + make_edge (bb, set_zero, irr); + } + + start_sequence (); + convert_move (counter_reg, noloop, 0); + sequence = get_insns (); + end_sequence (); + emit_insn_after (sequence, BB_END (set_zero)); + } + + /* Some targets (eg, C4x) need to initialize special looping + registers. */ +#ifdef HAVE_doloop_begin + { + rtx init; + unsigned level = get_loop_level (loop) + 1; + init = gen_doloop_begin (counter_reg, + desc->const_iter ? desc->niter_expr : const0_rtx, + desc->niter_max, + GEN_INT (level)); + if (init) + { + start_sequence (); + emit_insn (init); + sequence = get_insns (); + end_sequence (); + emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); + } + } +#endif + + /* Insert the new low-overhead looping insn. */ + emit_jump_insn_after (doloop_seq, BB_END (loop_end)); + jump_insn = BB_END (loop_end); + jump_label = block_label (desc->in_edge->dest); + JUMP_LABEL (jump_insn) = jump_label; + LABEL_NUSES (jump_label)++; + + /* Ensure the right fallthru edge is marked, for case we have reversed + the condition. */ + desc->in_edge->flags &= ~EDGE_FALLTHRU; + desc->out_edge->flags |= EDGE_FALLTHRU; + + /* Add a REG_NONNEG note if the actual or estimated maximum number + of iterations is non-negative. */ + if (nonneg) + { + REG_NOTES (jump_insn) + = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn)); + } +} + +/* Process loop described by LOOP validating that the loop is suitable for + conversion to use a low overhead looping instruction, replacing the jump + insn where suitable. Returns true if the loop was successfully + modified. */ + +static bool +doloop_optimize (struct loop *loop) +{ + enum machine_mode mode; + rtx doloop_seq, doloop_pat, doloop_reg; + rtx iterations; + rtx iterations_max; + rtx start_label; + rtx condition; + unsigned level, est_niter; + struct niter_desc *desc; + + if (dump_file) + fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); + + iv_analysis_loop_init (loop); + + /* Find the simple exit of a LOOP. */ + desc = get_simple_loop_desc (loop); + + /* Check that loop is a candidate for a low-overhead looping insn. */ + if (!doloop_valid_p (loop, desc)) + { + if (dump_file) + fprintf (dump_file, + "Doloop: The loop is not suitable.\n"); + return false; + } + mode = desc->mode; + + est_niter = 3; + if (desc->const_iter) + est_niter = desc->niter; + /* If the estimate on number of iterations is reliable (comes from profile + feedback), use it. Do not use it normally, since the expected number + of iterations of an unrolled loop is 2. */ + if (loop->header->count) + est_niter = expected_loop_iterations (loop); + + if (est_niter < 3) + { + if (dump_file) + fprintf (dump_file, + "Doloop: Too few iterations (%u) to be profitable.\n", + est_niter); + return false; + } + + iterations = desc->const_iter ? desc->niter_expr : const0_rtx; + iterations_max = GEN_INT (desc->niter_max); + level = get_loop_level (loop) + 1; + + /* Generate looping insn. If the pattern FAILs then give up trying + to modify the loop since there is some aspect the back-end does + not like. */ + start_label = block_label (desc->in_edge->dest); + doloop_reg = gen_reg_rtx (mode); + doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, + GEN_INT (level), start_label); + if (! doloop_seq && mode != word_mode) + { + PUT_MODE (doloop_reg, word_mode); + doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, + GEN_INT (level), start_label); + } + if (! doloop_seq) + { + if (dump_file) + fprintf (dump_file, + "Doloop: Target unwilling to use doloop pattern!\n"); + return false; + } + + /* If multiple instructions were created, the last must be the + jump instruction. Also, a raw define_insn may yield a plain + pattern. */ + doloop_pat = doloop_seq; + if (INSN_P (doloop_pat)) + { + while (NEXT_INSN (doloop_pat) != NULL_RTX) + doloop_pat = NEXT_INSN (doloop_pat); + if (GET_CODE (doloop_pat) == JUMP_INSN) + doloop_pat = PATTERN (doloop_pat); + else + doloop_pat = NULL_RTX; + } + + if (! doloop_pat + || ! (condition = doloop_condition_get (doloop_pat))) + { + if (dump_file) + fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n"); + return false; + } + + doloop_modify (loop, desc, doloop_seq, condition); + return true; +} + +/* This is the main entry point. Process all LOOPS using doloop_optimize. */ + +void +doloop_optimize_loops (struct loops *loops) +{ + unsigned i; + struct loop *loop; + + for (i = 1; i < loops->num; i++) + { + loop = loops->parray[i]; + if (!loop) + continue; + + doloop_optimize (loop); + } + + iv_analysis_done (); + +#ifdef ENABLE_CHECKING + verify_dominators (CDI_DOMINATORS); + verify_loop_structure (loops); +#endif +} +#endif /* HAVE_doloop_end */ + |