diff options
-rw-r--r-- | gcc/ChangeLog | 24 | ||||
-rw-r--r-- | gcc/Makefile.in | 8 | ||||
-rw-r--r-- | gcc/config/bfin/bfin.c | 733 | ||||
-rw-r--r-- | gcc/config/bfin/bfin.md | 2 | ||||
-rw-r--r-- | gcc/hw-doloop.c | 672 | ||||
-rw-r--r-- | gcc/hw-doloop.h | 157 |
6 files changed, 928 insertions, 668 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 35d8859c58a..f840e57803d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,27 @@ +2011-07-07 Bernd Schmidt <bernds@codesourcery.com> + + * hw-doloop.c: New file. + * hw-doloop.h: New file. + * Makefile.in (OBJS): Add hw-doloop.o. + (hw-doloop.o): New rule. + ($(obj_out_file)): Add hw-doloop.h dependency. + * config/bfin/bfin.c: Include "hw-doloop.h". + (loop_info, DEF_VEC_P for loop_info, loop_info_d): Remove. + (bfin_dump_loops, bfin_bb_in_loop, bfin_scan_loop): Remove. + (hwloop_optimize): Renamed from bfin_optimize_loop. Argument + type changed to hwloop_info. Return bool, true if the loop was + successfully optimized. Remove code that was moved to + hw-doloop.c, and adjust other parts. + (hwloop_fail): New static function, containing parts that used + to be in bfin_optimize_loop. + (bfin_discover_loop, bfin_discover_loops, free_loops, + bfin_reorder_loops): Remove. + (hwloop_pattern_reg): New static function. + (bfin_doloop_hooks): New variable. + (bfin_reorg_loops): Remove most code, call reorg_loops. + * config/bfin/bfin.md (doloop_end splitter): Also enable if + loop counter is a memory_operand. + 2011-07-07 H.J. Lu <hongjiu.lu@intel.com> * config.gcc: Support --with-multilib-list for x86 Linux diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 82119117e67..d09423dac00 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1300,6 +1300,7 @@ OBJS = \ graphite-sese-to-poly.o \ gtype-desc.o \ haifa-sched.o \ + hw-doloop.o \ hwint.o \ ifcvt.o \ implicit-zee.o \ @@ -3552,13 +3553,16 @@ target-globals.o : target-globals.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) insn-config.h $(MACHMODE_H) $(GGC_H) toplev.h target-globals.h \ $(FLAGS_H) $(REGS_H) $(RTL_H) reload.h expmed.h $(EXPR_H) $(OPTABS_H) \ $(LIBFUNCS_H) $(CFGLOOP_H) $(IRA_INT_H) builtins.h gcse.h bb-reorder.h - +hw-doloop.o : hw-doloop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) $(FLAGS_H) $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) $(TM_P_H) \ + $(DF_H) $(CFGLAYOUT_H) $(CFGLOOP_H) output.h $(RECOG_H) $(TARGET_H) \ + $(REGS_H) hw-doloop.h $(out_object_file): $(out_file) $(CONFIG_H) coretypes.h $(TM_H) $(TREE_H) \ $(RTL_H) $(REGS_H) hard-reg-set.h insn-config.h conditions.h \ output.h $(INSN_ATTR_H) $(SYSTEM_H) toplev.h $(DIAGNOSTIC_CORE_H) \ $(TARGET_H) $(LIBFUNCS_H) $(TARGET_DEF_H) $(FUNCTION_H) $(SCHED_INT_H) \ $(TM_P_H) $(EXPR_H) langhooks.h $(GGC_H) $(OPTABS_H) $(REAL_H) \ - tm-constrs.h $(GIMPLE_H) $(DF_H) cselib.h $(COMMON_TARGET_H) + tm-constrs.h $(GIMPLE_H) $(DF_H) cselib.h $(COMMON_TARGET_H) hw-doloop.h $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) \ $(out_file) $(OUTPUT_OPTION) diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c index 22fdfb79b15..f5b45dc0efb 100644 --- a/gcc/config/bfin/bfin.c +++ b/gcc/config/bfin/bfin.c @@ -56,6 +56,7 @@ #include "timevar.h" #include "df.h" #include "sel-sched.h" +#include "hw-doloop.h" #include "opts.h" /* A C structure for machine-specific, per-function data. @@ -3381,157 +3382,6 @@ bfin_hardware_loop (void) /* Maximum distance of the LSETUP instruction from the loop start. */ #define MAX_LSETUP_DISTANCE 30 -/* We need to keep a vector of loops */ -typedef struct loop_info_d *loop_info; -DEF_VEC_P (loop_info); -DEF_VEC_ALLOC_P (loop_info,heap); - -/* Information about a loop we have found (or are in the process of - finding). */ -struct GTY (()) loop_info_d -{ - /* loop number, for dumps */ - int loop_no; - - /* All edges that jump into and out of the loop. */ - VEC(edge,gc) *incoming; - - /* We can handle two cases: all incoming edges have the same destination - block, or all incoming edges have the same source block. These two - members are set to the common source or destination we found, or NULL - if different blocks were found. If both are NULL the loop can't be - optimized. */ - basic_block incoming_src; - basic_block incoming_dest; - - /* First block in the loop. This is the one branched to by the loop_end - insn. */ - basic_block head; - - /* Last block in the loop (the one with the loop_end insn). */ - basic_block tail; - - /* The successor block of the loop. This is the one the loop_end insn - falls into. */ - basic_block successor; - - /* The last instruction in the tail. */ - rtx last_insn; - - /* The loop_end insn. */ - rtx loop_end; - - /* The iteration register. */ - rtx iter_reg; - - /* The new label placed at the beginning of the loop. */ - rtx start_label; - - /* The new label placed at the end of the loop. */ - rtx end_label; - - /* The length of the loop. */ - int length; - - /* The nesting depth of the loop. */ - int depth; - - /* Nonzero if we can't optimize this loop. */ - int bad; - - /* True if we have visited this loop. */ - int visited; - - /* True if this loop body clobbers any of LC0, LT0, or LB0. */ - int clobber_loop0; - - /* True if this loop body clobbers any of LC1, LT1, or LB1. */ - int clobber_loop1; - - /* Next loop in the graph. */ - struct loop_info_d *next; - - /* Immediate outer loop of this loop. */ - struct loop_info_d *outer; - - /* Vector of blocks only within the loop, including those within - inner loops. */ - VEC (basic_block,heap) *blocks; - - /* Same information in a bitmap. */ - bitmap block_bitmap; - - /* Vector of inner loops within this loop */ - VEC (loop_info,heap) *loops; -}; - -static void -bfin_dump_loops (loop_info loops) -{ - loop_info loop; - - for (loop = loops; loop; loop = loop->next) - { - loop_info i; - basic_block b; - unsigned ix; - - fprintf (dump_file, ";; loop %d: ", loop->loop_no); - if (loop->bad) - fprintf (dump_file, "(bad) "); - fprintf (dump_file, "{head:%d, depth:%d}", loop->head->index, loop->depth); - - fprintf (dump_file, " blocks: [ "); - FOR_EACH_VEC_ELT (basic_block, loop->blocks, ix, b) - fprintf (dump_file, "%d ", b->index); - fprintf (dump_file, "] "); - - fprintf (dump_file, " inner loops: [ "); - FOR_EACH_VEC_ELT (loop_info, loop->loops, ix, i) - fprintf (dump_file, "%d ", i->loop_no); - fprintf (dump_file, "]\n"); - } - fprintf (dump_file, "\n"); -} - -/* Scan the blocks of LOOP (and its inferiors) looking for basic block - BB. Return true, if we find it. */ - -static bool -bfin_bb_in_loop (loop_info loop, basic_block bb) -{ - return bitmap_bit_p (loop->block_bitmap, bb->index); -} - -/* Scan the blocks of LOOP (and its inferiors) looking for uses of - REG. Return true, if we find any. Don't count the loop's loop_end - insn if it matches LOOP_END. */ - -static bool -bfin_scan_loop (loop_info loop, rtx reg, rtx loop_end) -{ - unsigned ix; - basic_block bb; - - FOR_EACH_VEC_ELT (basic_block, loop->blocks, ix, bb) - { - rtx insn; - - for (insn = BB_HEAD (bb); - insn != NEXT_INSN (BB_END (bb)); - insn = NEXT_INSN (insn)) - { - if (!INSN_P (insn)) - continue; - if (insn == loop_end) - continue; - if (reg_mentioned_p (reg, PATTERN (insn))) - return true; - } - } - return false; -} - /* Estimate the length of INSN conservatively. */ static int @@ -3559,67 +3409,32 @@ length_for_loop (rtx insn) /* Optimize LOOP. */ -static void -bfin_optimize_loop (loop_info loop) +static bool +hwloop_optimize (hwloop_info loop) { basic_block bb; - loop_info inner; + hwloop_info inner; rtx insn, last_insn; rtx loop_init, start_label, end_label; - rtx reg_lc0, reg_lc1, reg_lt0, reg_lt1, reg_lb0, reg_lb1; rtx iter_reg, scratchreg, scratch_init, scratch_init_insn; rtx lc_reg, lt_reg, lb_reg; rtx seq, seq_end; int length; unsigned ix; - int inner_depth = 0; - - if (loop->visited) - return; - - loop->visited = 1; - - if (loop->bad) - { - if (dump_file) - fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no); - goto bad_loop; - } - - /* Every loop contains in its list of inner loops every loop nested inside - it, even if there are intermediate loops. This works because we're doing - a depth-first search here and never visit a loop more than once. */ - FOR_EACH_VEC_ELT (loop_info, loop->loops, ix, inner) - { - bfin_optimize_loop (inner); - - if (!inner->bad && inner_depth < inner->depth) - { - inner_depth = inner->depth; + bool clobber0, clobber1; - loop->clobber_loop0 |= inner->clobber_loop0; - loop->clobber_loop1 |= inner->clobber_loop1; - } - } - - loop->depth = inner_depth + 1; if (loop->depth > MAX_LOOP_DEPTH) { if (dump_file) fprintf (dump_file, ";; loop %d too deep\n", loop->loop_no); - goto bad_loop; + return false; } /* Get the loop iteration register. */ iter_reg = loop->iter_reg; - if (!REG_P (iter_reg)) - { - if (dump_file) - fprintf (dump_file, ";; loop %d iteration count not in a register\n", - loop->loop_no); - goto bad_loop; - } + gcc_assert (REG_P (iter_reg)); + scratchreg = NULL_RTX; scratch_init = iter_reg; scratch_init_insn = NULL_RTX; @@ -3680,7 +3495,7 @@ bfin_optimize_loop (loop_info loop) if (dump_file) fprintf (dump_file, ";; loop %d lsetup not before loop_start\n", loop->loop_no); - goto bad_loop; + return false; } /* Account for the pop of a scratch register where necessary. */ @@ -3692,7 +3507,7 @@ bfin_optimize_loop (loop_info loop) { if (dump_file) fprintf (dump_file, ";; loop %d lsetup too far away\n", loop->loop_no); - goto bad_loop; + return false; } } @@ -3710,7 +3525,7 @@ bfin_optimize_loop (loop_info loop) if (dump_file) fprintf (dump_file, ";; loop %d start_label not before loop_end\n", loop->loop_no); - goto bad_loop; + return false; } loop->length = length; @@ -3718,58 +3533,29 @@ bfin_optimize_loop (loop_info loop) { if (dump_file) fprintf (dump_file, ";; loop %d too long\n", loop->loop_no); - goto bad_loop; + return false; } /* Scan all the blocks to make sure they don't use iter_reg. */ - if (bfin_scan_loop (loop, iter_reg, loop->loop_end)) + if (loop->iter_reg_used || loop->iter_reg_used_outside) { if (dump_file) fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no); - goto bad_loop; - } - - /* Scan all the insns to see if the loop body clobber - any hardware loop registers. */ - - reg_lc0 = gen_rtx_REG (SImode, REG_LC0); - reg_lc1 = gen_rtx_REG (SImode, REG_LC1); - reg_lt0 = gen_rtx_REG (SImode, REG_LT0); - reg_lt1 = gen_rtx_REG (SImode, REG_LT1); - reg_lb0 = gen_rtx_REG (SImode, REG_LB0); - reg_lb1 = gen_rtx_REG (SImode, REG_LB1); - - FOR_EACH_VEC_ELT (basic_block, loop->blocks, ix, bb) - { - rtx insn; - - for (insn = BB_HEAD (bb); - insn != NEXT_INSN (BB_END (bb)); - insn = NEXT_INSN (insn)) - { - if (!INSN_P (insn)) - continue; - - if (reg_set_p (reg_lc0, insn) - || reg_set_p (reg_lt0, insn) - || reg_set_p (reg_lb0, insn)) - loop->clobber_loop0 = 1; - - if (reg_set_p (reg_lc1, insn) - || reg_set_p (reg_lt1, insn) - || reg_set_p (reg_lb1, insn)) - loop->clobber_loop1 |= 1; - } + return false; } - if ((loop->clobber_loop0 && loop->clobber_loop1) - || (loop->depth == MAX_LOOP_DEPTH && loop->clobber_loop0)) + clobber0 = (TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC0) + || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LB0) + || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LT0)); + clobber1 = (TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC1) + || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LB1) + || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LT1)); + if (clobber0 && clobber1) { - loop->depth = MAX_LOOP_DEPTH + 1; if (dump_file) fprintf (dump_file, ";; loop %d no loop reg available\n", loop->loop_no); - goto bad_loop; + return false; } /* There should be an instruction before the loop_end instruction @@ -3814,7 +3600,7 @@ bfin_optimize_loop (loop_info loop) if (dump_file) fprintf (dump_file, ";; loop %d has no last instruction\n", loop->loop_no); - goto bad_loop; + return false; } if (JUMP_P (last_insn) && !any_condjump_p (last_insn)) @@ -3822,7 +3608,7 @@ bfin_optimize_loop (loop_info loop) if (dump_file) fprintf (dump_file, ";; loop %d has bad last instruction\n", loop->loop_no); - goto bad_loop; + return false; } /* In all other cases, try to replace a bad last insn with a nop. */ else if (JUMP_P (last_insn) @@ -3838,7 +3624,7 @@ bfin_optimize_loop (loop_info loop) { if (dump_file) fprintf (dump_file, ";; loop %d too long\n", loop->loop_no); - goto bad_loop; + return false; } if (dump_file) fprintf (dump_file, ";; loop %d has bad last insn; replace with nop\n", @@ -3854,19 +3640,19 @@ bfin_optimize_loop (loop_info loop) end_label = gen_label_rtx (); iter_reg = loop->iter_reg; - if (loop->depth == 1 && !loop->clobber_loop1) + if (loop->depth == 1 && !clobber1) { - lc_reg = reg_lc1; - lt_reg = reg_lt1; - lb_reg = reg_lb1; - loop->clobber_loop1 = 1; + lc_reg = gen_rtx_REG (SImode, REG_LC1); + lb_reg = gen_rtx_REG (SImode, REG_LB1); + lt_reg = gen_rtx_REG (SImode, REG_LT1); + SET_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC1); } else { - lc_reg = reg_lc0; - lt_reg = reg_lt0; - lb_reg = reg_lb0; - loop->clobber_loop0 = 1; + lc_reg = gen_rtx_REG (SImode, REG_LC0); + lb_reg = gen_rtx_REG (SImode, REG_LB0); + lt_reg = gen_rtx_REG (SImode, REG_LT0); + SET_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC0); } loop->end_label = end_label; @@ -4009,15 +3795,17 @@ bfin_optimize_loop (loop_info loop) /* Insert the loop end label before the last instruction of the loop. */ emit_label_before (loop->end_label, loop->last_insn); - return; - - bad_loop: - - if (dump_file) - fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no); - - loop->bad = 1; + return true; +} +/* A callback for the hw-doloop pass. Called when a loop we have discovered + turns out not to be optimizable; we have to split the doloop_end pattern + into a subtract and a test. */ +static void +hwloop_fail (hwloop_info loop) +{ + rtx insn = loop->loop_end; + if (DPREG_P (loop->iter_reg)) { /* If loop->iter_reg is a DREG or PREG, we can split it here @@ -4039,369 +3827,39 @@ bfin_optimize_loop (loop_info loop) LABEL_NUSES (loop->start_label)++; delete_insn (loop->loop_end); } -} - -/* Called from bfin_reorg_loops when a potential loop end is found. LOOP is - a newly set up structure describing the loop, it is this function's - responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the - loop_end insn and its enclosing basic block. */ - -static void -bfin_discover_loop (loop_info loop, basic_block tail_bb, rtx tail_insn) -{ - unsigned dwork = 0; - basic_block bb; - VEC (basic_block,heap) *works = VEC_alloc (basic_block,heap,20); - - loop->tail = tail_bb; - loop->head = BRANCH_EDGE (tail_bb)->dest; - loop->successor = FALLTHRU_EDGE (tail_bb)->dest; - loop->loop_end = tail_insn; - loop->last_insn = NULL_RTX; - loop->iter_reg = SET_DEST (XVECEXP (PATTERN (tail_insn), 0, 1)); - loop->depth = loop->length = 0; - loop->visited = 0; - loop->clobber_loop0 = loop->clobber_loop1 = 0; - loop->outer = NULL; - loop->loops = NULL; - loop->incoming = VEC_alloc (edge, gc, 2); - loop->start_label = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (tail_insn), 0, 0)), 1), 0); - loop->end_label = NULL_RTX; - loop->bad = 0; - - VEC_safe_push (basic_block, heap, works, loop->head); - - while (VEC_iterate (basic_block, works, dwork++, bb)) - { - edge e; - edge_iterator ei; - if (bb == EXIT_BLOCK_PTR) - { - /* We've reached the exit block. The loop must be bad. */ - if (dump_file) - fprintf (dump_file, - ";; Loop is bad - reached exit block while scanning\n"); - loop->bad = 1; - break; - } - - if (!bitmap_set_bit (loop->block_bitmap, bb->index)) - continue; - - /* We've not seen this block before. Add it to the loop's - list and then add each successor to the work list. */ - - VEC_safe_push (basic_block, heap, loop->blocks, bb); - - if (bb != tail_bb) - { - FOR_EACH_EDGE (e, ei, bb->succs) - { - basic_block succ = EDGE_SUCC (bb, ei.index)->dest; - if (!REGNO_REG_SET_P (df_get_live_in (succ), - REGNO (loop->iter_reg))) - continue; - if (!VEC_space (basic_block, works, 1)) - { - if (dwork) - { - VEC_block_remove (basic_block, works, 0, dwork); - dwork = 0; - } - else - VEC_reserve (basic_block, heap, works, 1); - } - VEC_quick_push (basic_block, works, succ); - } - } - } - - /* Find the predecessor, and make sure nothing else jumps into this loop. */ - if (!loop->bad) + else { - int pass, retry; - FOR_EACH_VEC_ELT (basic_block, loop->blocks, dwork, bb) - { - edge e; - edge_iterator ei; - FOR_EACH_EDGE (e, ei, bb->preds) - { - basic_block pred = e->src; - - if (!bfin_bb_in_loop (loop, pred)) - { - if (dump_file) - fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n", - loop->loop_no, pred->index, - e->dest->index); - VEC_safe_push (edge, gc, loop->incoming, e); - } - } - } - - for (pass = 0, retry = 1; retry && pass < 2; pass++) - { - edge e; - edge_iterator ei; - bool first = true; - retry = 0; - - FOR_EACH_EDGE (e, ei, loop->incoming) - { - if (first) - { - loop->incoming_src = e->src; - loop->incoming_dest = e->dest; - first = false; - } - else - { - if (e->dest != loop->incoming_dest) - loop->incoming_dest = NULL; - if (e->src != loop->incoming_src) - loop->incoming_src = NULL; - } - if (loop->incoming_src == NULL && loop->incoming_dest == NULL) - { - if (pass == 0) - { - if (dump_file) - fprintf (dump_file, - ";; retrying loop %d with forwarder blocks\n", - loop->loop_no); - retry = 1; - break; - } - loop->bad = 1; - if (dump_file) - fprintf (dump_file, - ";; can't find suitable entry for loop %d\n", - loop->loop_no); - goto out; - } - } - if (retry) - { - retry = 0; - FOR_EACH_EDGE (e, ei, loop->incoming) - { - if (forwarder_block_p (e->src)) - { - edge e2; - edge_iterator ei2; - - if (dump_file) - fprintf (dump_file, - ";; Adding forwarder block %d to loop %d and retrying\n", - e->src->index, loop->loop_no); - VEC_safe_push (basic_block, heap, loop->blocks, e->src); - bitmap_set_bit (loop->block_bitmap, e->src->index); - FOR_EACH_EDGE (e2, ei2, e->src->preds) - VEC_safe_push (edge, gc, loop->incoming, e2); - VEC_unordered_remove (edge, loop->incoming, ei.index); - retry = 1; - break; - } - } - if (!retry) - { - if (dump_file) - fprintf (dump_file, ";; No forwarder blocks found\n"); - loop->bad = 1; - } - } - } + splitting_loops = 1; + try_split (PATTERN (insn), insn, 1); + splitting_loops = 0; } - - out: - VEC_free (basic_block, heap, works); } -/* Analyze the structure of the loops in the current function. Use STACK - for bitmap allocations. Returns all the valid candidates for hardware - loops found in this function. */ -static loop_info -bfin_discover_loops (bitmap_obstack *stack, FILE *dump_file) -{ - loop_info loops = NULL; - loop_info loop; - basic_block bb; - bitmap tmp_bitmap; - int nloops = 0; +/* A callback for the hw-doloop pass. This function examines INSN; if + it is a loop_end pattern we recognize, return the reg rtx for the + loop counter. Otherwise, return NULL_RTX. */ - /* Find all the possible loop tails. This means searching for every - loop_end instruction. For each one found, create a loop_info - structure and add the head block to the work list. */ - FOR_EACH_BB (bb) - { - rtx tail = BB_END (bb); - - while (GET_CODE (tail) == NOTE) - tail = PREV_INSN (tail); - - bb->aux = NULL; - - if (INSN_P (tail) && recog_memoized (tail) == CODE_FOR_loop_end) - { - rtx insn; - /* A possible loop end */ - - /* There's a degenerate case we can handle - an empty loop consisting - of only a back branch. Handle that by deleting the branch. */ - insn = BB_HEAD (BRANCH_EDGE (bb)->dest); - if (next_real_insn (insn) == tail) - { - if (dump_file) - { - fprintf (dump_file, ";; degenerate loop ending at\n"); - print_rtl_single (dump_file, tail); - } - delete_insn_and_edges (tail); - continue; - } - - loop = XNEW (struct loop_info_d); - loop->next = loops; - loops = loop; - loop->loop_no = nloops++; - loop->blocks = VEC_alloc (basic_block, heap, 20); - loop->block_bitmap = BITMAP_ALLOC (stack); - bb->aux = loop; - - if (dump_file) - { - fprintf (dump_file, ";; potential loop %d ending at\n", - loop->loop_no); - print_rtl_single (dump_file, tail); - } - - bfin_discover_loop (loop, bb, tail); - } - } - - tmp_bitmap = BITMAP_ALLOC (stack); - /* Compute loop nestings. */ - for (loop = loops; loop; loop = loop->next) - { - loop_info other; - if (loop->bad) - continue; - - for (other = loop->next; other; other = other->next) - { - if (other->bad) - continue; - - bitmap_and (tmp_bitmap, other->block_bitmap, loop->block_bitmap); - if (bitmap_empty_p (tmp_bitmap)) - continue; - if (bitmap_equal_p (tmp_bitmap, other->block_bitmap)) - { - other->outer = loop; - VEC_safe_push (loop_info, heap, loop->loops, other); - } - else if (bitmap_equal_p (tmp_bitmap, loop->block_bitmap)) - { - loop->outer = other; - VEC_safe_push (loop_info, heap, other->loops, loop); - } - else - { - if (dump_file) - fprintf (dump_file, - ";; can't find suitable nesting for loops %d and %d\n", - loop->loop_no, other->loop_no); - loop->bad = other->bad = 1; - } - } - } - BITMAP_FREE (tmp_bitmap); +static rtx +hwloop_pattern_reg (rtx insn) +{ + rtx pat, reg; - return loops; -} + if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end) + return NULL_RTX; -/* Free up the loop structures in LOOPS. */ -static void -free_loops (loop_info loops) -{ - while (loops) - { - loop_info loop = loops; - loops = loop->next; - VEC_free (loop_info, heap, loop->loops); - VEC_free (basic_block, heap, loop->blocks); - BITMAP_FREE (loop->block_bitmap); - XDELETE (loop); - } + pat = PATTERN (insn); + reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1)); + if (!REG_P (reg)) + return NULL_RTX; + return reg; } -#define BB_AUX_INDEX(BB) ((intptr_t)(BB)->aux) - -/* The taken-branch edge from the loop end can actually go forward. Since the - Blackfin's LSETUP instruction requires that the loop end be after the loop - start, try to reorder a loop's basic blocks when we find such a case. */ -static void -bfin_reorder_loops (loop_info loops, FILE *dump_file) +static struct hw_doloop_hooks bfin_doloop_hooks = { - basic_block bb; - loop_info loop; - - FOR_EACH_BB (bb) - bb->aux = NULL; - cfg_layout_initialize (0); - - for (loop = loops; loop; loop = loop->next) - { - intptr_t index; - basic_block bb; - edge e; - edge_iterator ei; - - if (loop->bad) - continue; - - /* Recreate an index for basic blocks that represents their order. */ - for (bb = ENTRY_BLOCK_PTR->next_bb, index = 0; - bb != EXIT_BLOCK_PTR; - bb = bb->next_bb, index++) - bb->aux = (PTR) index; - - if (BB_AUX_INDEX (loop->head) < BB_AUX_INDEX (loop->tail)) - continue; - - FOR_EACH_EDGE (e, ei, loop->head->succs) - { - if (bitmap_bit_p (loop->block_bitmap, e->dest->index) - && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail)) - { - basic_block start_bb = e->dest; - basic_block start_prev_bb = start_bb->prev_bb; - - if (dump_file) - fprintf (dump_file, ";; Moving block %d before block %d\n", - loop->head->index, start_bb->index); - loop->head->prev_bb->next_bb = loop->head->next_bb; - loop->head->next_bb->prev_bb = loop->head->prev_bb; - - loop->head->prev_bb = start_prev_bb; - loop->head->next_bb = start_bb; - start_prev_bb->next_bb = start_bb->prev_bb = loop->head; - break; - } - } - loops = loops->next; - } - - FOR_EACH_BB (bb) - { - if (bb->next_bb != EXIT_BLOCK_PTR) - bb->aux = bb->next_bb; - else - bb->aux = NULL; - } - cfg_layout_finalize (); - df_analyze (); -} + hwloop_pattern_reg, + hwloop_optimize, + hwloop_fail +}; /* Run from machine_dependent_reorg, this pass looks for doloop_end insns and tries to rewrite the RTL of these loops so that proper Blackfin @@ -4410,62 +3868,7 @@ bfin_reorder_loops (loop_info loops, FILE *dump_file) static void bfin_reorg_loops (FILE *dump_file) { - loop_info loops = NULL; - loop_info loop; - basic_block bb; - bitmap_obstack stack; - - bitmap_obstack_initialize (&stack); - - if (dump_file) - fprintf (dump_file, ";; Find loops, first pass\n\n"); - - loops = bfin_discover_loops (&stack, dump_file); - - if (dump_file) - bfin_dump_loops (loops); - - bfin_reorder_loops (loops, dump_file); - free_loops (loops); - - if (dump_file) - fprintf (dump_file, ";; Find loops, second pass\n\n"); - - loops = bfin_discover_loops (&stack, dump_file); - if (dump_file) - { - fprintf (dump_file, ";; All loops found:\n\n"); - bfin_dump_loops (loops); - } - - /* Now apply the optimizations. */ - for (loop = loops; loop; loop = loop->next) - bfin_optimize_loop (loop); - - if (dump_file) - { - fprintf (dump_file, ";; After hardware loops optimization:\n\n"); - bfin_dump_loops (loops); - } - - free_loops (loops); - - if (dump_file) - print_rtl (dump_file, get_insns ()); - - FOR_EACH_BB (bb) - bb->aux = NULL; - - splitting_loops = 1; - FOR_EACH_BB (bb) - { - rtx insn = BB_END (bb); - if (!JUMP_P (insn)) - continue; - - try_split (PATTERN (insn), insn, 1); - } - splitting_loops = 0; + reorg_loops (true, &bfin_doloop_hooks); } /* Possibly generate a SEQUENCE out of three insns found in SLOT. diff --git a/gcc/config/bfin/bfin.md b/gcc/config/bfin/bfin.md index a96d1a7bda6..8d8413de304 100644 --- a/gcc/config/bfin/bfin.md +++ b/gcc/config/bfin/bfin.md @@ -1993,7 +1993,7 @@ (const_int -1))) (unspec [(const_int 0)] UNSPEC_LSETUP_END) (clobber (match_scratch:SI 2 "=&r"))] - "splitting_loops" + "memory_operand (operands[0], SImode) || splitting_loops" [(set (match_dup 2) (match_dup 0)) (set (match_dup 2) (plus:SI (match_dup 2) (const_int -1))) (set (match_dup 0) (match_dup 2)) diff --git a/gcc/hw-doloop.c b/gcc/hw-doloop.c new file mode 100644 index 00000000000..3d59200a84b --- /dev/null +++ b/gcc/hw-doloop.c @@ -0,0 +1,672 @@ +/* Code to analyze doloop loops in order for targets to perform late + optimizations converting doloops to other forms of hardware loops. + Copyright (C) 2011 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "flags.h" +#include "expr.h" +#include "hard-reg-set.h" +#include "regs.h" +#include "basic-block.h" +#include "tm_p.h" +#include "df.h" +#include "cfglayout.h" +#include "cfgloop.h" +#include "output.h" +#include "recog.h" +#include "target.h" +#include "hw-doloop.h" + +#ifdef HAVE_doloop_end + +/* Dump information collected in LOOPS. */ +static void +dump_hwloops (hwloop_info loops) +{ + hwloop_info loop; + + for (loop = loops; loop; loop = loop->next) + { + hwloop_info i; + basic_block b; + unsigned ix; + + fprintf (dump_file, ";; loop %d: ", loop->loop_no); + if (loop->bad) + fprintf (dump_file, "(bad) "); + fprintf (dump_file, "{head:%d, depth:%d, reg:%u}", + loop->head == NULL ? -1 : loop->head->index, + loop->depth, REGNO (loop->iter_reg)); + + fprintf (dump_file, " blocks: [ "); + for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, b); ix++) + fprintf (dump_file, "%d ", b->index); + fprintf (dump_file, "] "); + + fprintf (dump_file, " inner loops: [ "); + for (ix = 0; VEC_iterate (hwloop_info, loop->loops, ix, i); ix++) + fprintf (dump_file, "%d ", i->loop_no); + fprintf (dump_file, "]\n"); + } + fprintf (dump_file, "\n"); +} + +/* Return true if BB is part of LOOP. */ +static bool +bb_in_loop_p (hwloop_info loop, basic_block bb) +{ + return bitmap_bit_p (loop->block_bitmap, bb->index); +} + +/* Scan the blocks of LOOP (and its inferiors), and record things such as + hard registers set, jumps out of and within the loop. */ +static void +scan_loop (hwloop_info loop) +{ + unsigned ix; + basic_block bb; + + if (loop->bad) + return; + + if (REGNO_REG_SET_P (df_get_live_in (loop->successor), + REGNO (loop->iter_reg))) + loop->iter_reg_used_outside = true; + + for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++) + { + rtx insn; + edge e; + edge_iterator ei; + + if (bb != loop->tail) + FOR_EACH_EDGE (e, ei, bb->succs) + { + if (bb_in_loop_p (loop, e->dest)) + { + if (!(e->flags & EDGE_FALLTHRU)) + loop->jumps_within = true; + } + else + { + loop->jumps_outof = true; + if (!loop->bad) + gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest), + REGNO (loop->iter_reg))); + } + } + + for (insn = BB_HEAD (bb); + insn != NEXT_INSN (BB_END (bb)); + insn = NEXT_INSN (insn)) + { + df_ref *def_rec; + HARD_REG_SET set_this_insn; + + if (!INSN_P (insn)) + continue; + + if (recog_memoized (insn) < 0 + && (GET_CODE (PATTERN (insn)) == ASM_INPUT + || asm_noperands (PATTERN (insn)) >= 0)) + loop->has_asm = true; + + CLEAR_HARD_REG_SET (set_this_insn); + for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) + { + rtx dreg = DF_REF_REG (*def_rec); + + if (!REG_P (dreg)) + continue; + + add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg), + REGNO (dreg)); + } + + if (insn == loop->loop_end) + CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg)); + else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn))) + loop->iter_reg_used = true; + IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn); + } + } +} + +/* Compute the incoming_dest and incoming_src members of LOOP by + identifying the edges that jump into the loop. If there is more + than one block that jumps into the loop, incoming_src will be set + to NULL; likewise, if there is more than one block in the loop that + is the destination of an incoming edge, incoming_dest will be NULL. + + Return true if either of these two fields is nonnull, false + otherwise. */ +static bool +process_incoming_edges (hwloop_info loop) +{ + edge e; + edge_iterator ei; + bool first = true; + + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (first) + { + loop->incoming_src = e->src; + loop->incoming_dest = e->dest; + first = false; + } + else + { + if (e->dest != loop->incoming_dest) + loop->incoming_dest = NULL; + if (e->src != loop->incoming_src) + loop->incoming_src = NULL; + } + } + if (loop->incoming_src == NULL && loop->incoming_dest == NULL) + return false; + + return true; +} + +/* Try to identify a forwarder block that jump into LOOP, and add it to + the set of blocks in the loop, updating the vector of incoming blocks as + well. This transformation gives a second chance to loops we couldn't + otherwise handle by increasing the chance that we'll end up with one + incoming_src block. + Return true if we made a change, false otherwise. */ +static bool +add_forwarder_blocks (hwloop_info loop) +{ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (forwarder_block_p (e->src)) + { + edge e2; + edge_iterator ei2; + + if (dump_file) + fprintf (dump_file, + ";; Adding forwarder block %d to loop %d and retrying\n", + e->src->index, loop->loop_no); + VEC_safe_push (basic_block, heap, loop->blocks, e->src); + bitmap_set_bit (loop->block_bitmap, e->src->index); + FOR_EACH_EDGE (e2, ei2, e->src->preds) + VEC_safe_push (edge, gc, loop->incoming, e2); + VEC_unordered_remove (edge, loop->incoming, ei.index); + return true; + } + } + return false; +} + +/* Called from reorg_loops when a potential loop end is found. LOOP is + a newly set up structure describing the loop, it is this function's + responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the + loop_end insn and its enclosing basic block. REG is the loop counter + register. + For our purposes, a loop is defined by the set of blocks reachable from + the loop head in which the loop counter register is live. This matches + the expected use; targets that call into this code usually replace the + loop counter with a different special register. */ +static void +discover_loop (hwloop_info loop, basic_block tail_bb, rtx tail_insn, rtx reg) +{ + bool found_tail; + unsigned dwork = 0; + basic_block bb; + VEC (basic_block,heap) *works; + + loop->tail = tail_bb; + loop->loop_end = tail_insn; + loop->iter_reg = reg; + loop->incoming = VEC_alloc (edge, gc, 2); + loop->start_label = JUMP_LABEL (tail_insn); + + if (EDGE_COUNT (tail_bb->succs) != 2) + { + loop->bad = true; + return; + } + loop->head = BRANCH_EDGE (tail_bb)->dest; + loop->successor = FALLTHRU_EDGE (tail_bb)->dest; + + works = VEC_alloc (basic_block, heap, 20); + VEC_safe_push (basic_block, heap, works, loop->head); + + found_tail = false; + for (dwork = 0; VEC_iterate (basic_block, works, dwork, bb); dwork++) + { + edge e; + edge_iterator ei; + if (bb == EXIT_BLOCK_PTR) + { + /* We've reached the exit block. The loop must be bad. */ + if (dump_file) + fprintf (dump_file, + ";; Loop is bad - reached exit block while scanning\n"); + loop->bad = true; + break; + } + + if (bitmap_bit_p (loop->block_bitmap, bb->index)) + continue; + + /* We've not seen this block before. Add it to the loop's + list and then add each successor to the work list. */ + + VEC_safe_push (basic_block, heap, loop->blocks, bb); + bitmap_set_bit (loop->block_bitmap, bb->index); + + if (bb == tail_bb) + found_tail = true; + else + { + FOR_EACH_EDGE (e, ei, bb->succs) + { + basic_block succ = EDGE_SUCC (bb, ei.index)->dest; + if (REGNO_REG_SET_P (df_get_live_in (succ), + REGNO (loop->iter_reg))) + VEC_safe_push (basic_block, heap, works, succ); + } + } + } + + if (!found_tail) + loop->bad = true; + + /* Find the predecessor, and make sure nothing else jumps into this loop. */ + if (!loop->bad) + { + FOR_EACH_VEC_ELT (basic_block, loop->blocks, dwork, bb) + { + edge e; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, bb->preds) + { + basic_block pred = e->src; + + if (!bb_in_loop_p (loop, pred)) + { + if (dump_file) + fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n", + loop->loop_no, pred->index, + e->dest->index); + VEC_safe_push (edge, gc, loop->incoming, e); + } + } + } + + if (!process_incoming_edges (loop)) + { + if (dump_file) + fprintf (dump_file, + ";; retrying loop %d with forwarder blocks\n", + loop->loop_no); + if (!add_forwarder_blocks (loop)) + { + if (dump_file) + fprintf (dump_file, ";; No forwarder blocks found\n"); + loop->bad = true; + } + else if (!process_incoming_edges (loop)) + { + if (dump_file) + fprintf (dump_file, + ";; can't find suitable entry for loop %d\n", + loop->loop_no); + } + } + } + + VEC_free (basic_block, heap, works); +} + +/* Analyze the structure of the loops in the current function. Use + STACK for bitmap allocations. Returns all the valid candidates for + hardware loops found in this function. HOOKS is the argument + passed to reorg_loops, used here to find the iteration registers + from a loop_end pattern. */ +static hwloop_info +discover_loops (bitmap_obstack *stack, struct hw_doloop_hooks *hooks) +{ + hwloop_info loops = NULL; + hwloop_info loop; + basic_block bb; + int nloops = 0; + + /* Find all the possible loop tails. This means searching for every + loop_end instruction. For each one found, create a hwloop_info + structure and add the head block to the work list. */ + FOR_EACH_BB (bb) + { + rtx tail = BB_END (bb); + rtx insn, reg; + + while (tail && GET_CODE (tail) == NOTE && tail != BB_HEAD (bb)) + tail = PREV_INSN (tail); + + if (tail == NULL_RTX) + continue; + + if (!JUMP_P (tail)) + continue; + reg = hooks->end_pattern_reg (tail); + if (reg == NULL_RTX) + continue; + + /* A possible loop end */ + + /* There's a degenerate case we can handle - an empty loop consisting + of only a back branch. Handle that by deleting the branch. */ + insn = JUMP_LABEL (tail); + while (insn && !NONDEBUG_INSN_P (insn)) + insn = NEXT_INSN (insn); + if (insn == tail) + { + basic_block succ = FALLTHRU_EDGE (bb)->dest; + if (dump_file) + { + fprintf (dump_file, ";; degenerate loop ending at\n"); + print_rtl_single (dump_file, tail); + } + if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg))) + { + if (dump_file) + fprintf (dump_file, ";; deleting it\n"); + delete_insn_and_edges (tail); + continue; + } + } + + loop = XCNEW (struct hwloop_info_d); + loop->next = loops; + loops = loop; + loop->loop_no = nloops++; + loop->blocks = VEC_alloc (basic_block, heap, 20); + loop->block_bitmap = BITMAP_ALLOC (stack); + + if (dump_file) + { + fprintf (dump_file, ";; potential loop %d ending at\n", + loop->loop_no); + print_rtl_single (dump_file, tail); + } + + discover_loop (loop, bb, tail, reg); + } + + /* Compute loop nestings. Given two loops A and B, either the sets + of their blocks don't intersect at all, or one is the subset of + the other, or the blocks don't form a good nesting structure. */ + for (loop = loops; loop; loop = loop->next) + { + hwloop_info other; + + if (loop->bad) + continue; + + for (other = loops; other; other = other->next) + { + if (other->bad) + continue; + + if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap)) + continue; + if (!bitmap_intersect_compl_p (other->block_bitmap, + loop->block_bitmap)) + VEC_safe_push (hwloop_info, heap, loop->loops, other); + else if (!bitmap_intersect_compl_p (loop->block_bitmap, + other->block_bitmap)) + VEC_safe_push (hwloop_info, heap, other->loops, loop); + else + { + if (dump_file) + fprintf (dump_file, + ";; can't find suitable nesting for loops %d and %d\n", + loop->loop_no, other->loop_no); + loop->bad = other->bad = true; + } + } + } + + if (dump_file) + dump_hwloops (loops); + + return loops; +} + +/* Free up the loop structures in LOOPS. */ +static void +free_loops (hwloop_info loops) +{ + while (loops) + { + hwloop_info loop = loops; + loops = loop->next; + VEC_free (hwloop_info, heap, loop->loops); + VEC_free (basic_block, heap, loop->blocks); + BITMAP_FREE (loop->block_bitmap); + XDELETE (loop); + } +} + +#define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux) + +/* Initialize the aux fields to give ascending indices to basic blocks. */ +static void +set_bb_indices (void) +{ + basic_block bb; + intptr_t index; + + index = 0; + FOR_EACH_BB (bb) + bb->aux = (void *) index++; +} + +/* The taken-branch edge from the loop end can actually go forward. + If the target's hardware loop support requires that the loop end be + after the loop start, try to reorder a loop's basic blocks when we + find such a case. + This is not very aggressive; it only moves at most one block. It + does not introduce new branches into loops; it may remove them, or + it may switch fallthru/jump edges. */ +static void +reorder_loops (hwloop_info loops) +{ + basic_block bb; + hwloop_info loop; + + cfg_layout_initialize (0); + + set_bb_indices (); + + for (loop = loops; loop; loop = loop->next) + { + edge e; + edge_iterator ei; + + if (loop->bad) + continue; + + if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail)) + continue; + + FOR_EACH_EDGE (e, ei, loop->head->succs) + { + if (bitmap_bit_p (loop->block_bitmap, e->dest->index) + && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail)) + { + basic_block start_bb = e->dest; + basic_block start_prev_bb = start_bb->prev_bb; + + if (dump_file) + fprintf (dump_file, ";; Moving block %d before block %d\n", + loop->head->index, start_bb->index); + loop->head->prev_bb->next_bb = loop->head->next_bb; + loop->head->next_bb->prev_bb = loop->head->prev_bb; + + loop->head->prev_bb = start_prev_bb; + loop->head->next_bb = start_bb; + start_prev_bb->next_bb = start_bb->prev_bb = loop->head; + + set_bb_indices (); + break; + } + } + loops = loops->next; + } + + FOR_EACH_BB (bb) + { + if (bb->next_bb != EXIT_BLOCK_PTR) + bb->aux = bb->next_bb; + else + bb->aux = NULL; + } + cfg_layout_finalize (); + clear_aux_for_blocks (); + df_analyze (); +} + +/* Call the OPT function for LOOP and all of its sub-loops. This is + done in a depth-first search; innermost loops are visited first. + OPTIMIZE and FAIL are the functions passed to reorg_loops by the + target's reorg pass. */ +static void +optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks) +{ + int ix; + hwloop_info inner; + int inner_depth = 0; + + if (loop->visited) + return; + + loop->visited = 1; + + if (loop->bad) + { + if (dump_file) + fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no); + goto bad_loop; + } + + /* Every loop contains in its list of inner loops every loop nested inside + it, even if there are intermediate loops. This works because we're doing + a depth-first search here and never visit a loop more than once. + Recursion depth is effectively limited by the number of available + hardware registers. */ + for (ix = 0; VEC_iterate (hwloop_info, loop->loops, ix, inner); ix++) + { + optimize_loop (inner, hooks); + + if (!inner->bad && inner_depth < inner->depth) + inner_depth = inner->depth; + /* The set of registers may be changed while optimizing the inner + loop. */ + IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop); + } + + loop->depth = inner_depth + 1; + + if (hooks->opt (loop)) + return; + + bad_loop: + if (dump_file) + fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no); + + loop->bad = true; + hooks->fail (loop); +} + +/* This function can be used from a port's machine_dependent_reorg to + find and analyze loops that end in loop_end instructions. It uses + a set of function pointers in HOOKS to call back into the + target-specific functions to perform the actual machine-specific + transformations. + + Such transformations typically involve additional set-up + instructions before the loop, to define loop bounds or set up a + special loop counter register. + + DO_REORDER should be set to true if we should try to use the + reorder_loops function to ensure the loop end occurs after the loop + start. This is for use by targets where the loop hardware requires + this condition. + + HOOKS is used to pass in target specific hooks; see + hw-doloop.h. */ +void +reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks) +{ + hwloop_info loops = NULL; + hwloop_info loop; + bitmap_obstack stack; + + df_live_add_problem (); + df_live_set_all_dirty (); + df_analyze (); + + bitmap_obstack_initialize (&stack); + + if (dump_file) + fprintf (dump_file, ";; Find loops, first pass\n\n"); + + loops = discover_loops (&stack, hooks); + + if (do_reorder) + { + reorder_loops (loops); + free_loops (loops); + + if (dump_file) + fprintf (dump_file, ";; Find loops, second pass\n\n"); + + loops = discover_loops (&stack, hooks); + } + + for (loop = loops; loop; loop = loop->next) + scan_loop (loop); + + /* Now apply the optimizations. */ + for (loop = loops; loop; loop = loop->next) + optimize_loop (loop, hooks); + + if (dump_file) + { + fprintf (dump_file, ";; After hardware loops optimization:\n\n"); + dump_hwloops (loops); + } + + free_loops (loops); + + if (dump_file) + print_rtl (dump_file, get_insns ()); +} +#endif diff --git a/gcc/hw-doloop.h b/gcc/hw-doloop.h new file mode 100644 index 00000000000..006b67953b2 --- /dev/null +++ b/gcc/hw-doloop.h @@ -0,0 +1,157 @@ +/* Code to analyze doloop loops in order for targets to perform late + optimizations converting doloops to other forms of hardware loops. + Copyright (C) 2011 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* We need to keep a vector of loops */ +typedef struct hwloop_info_d *hwloop_info; +DEF_VEC_P (hwloop_info); +DEF_VEC_ALLOC_P (hwloop_info,heap); + +/* Information about a loop we have found (or are in the process of + finding). */ +struct GTY (()) hwloop_info_d +{ + /* loop number, for dumps */ + int loop_no; + + /* Next loop in the graph. */ + hwloop_info next; + + /* Vector of blocks only within the loop, including those within + inner loops. */ + VEC (basic_block, heap) *blocks; + + /* Same information in a bitmap. */ + bitmap block_bitmap; + + /* Vector of inner loops within this loop. Includes loops of every + nesting level. */ + VEC (hwloop_info, heap) *loops; + + /* All edges that jump into the loop. */ + VEC(edge, gc) *incoming; + + /* The ports currently using this infrastructure can typically + handle two cases: all incoming edges have the same destination + block, or all incoming edges have the same source block. These + two members are set to the common source or destination we found, + or NULL if different blocks were found. If both are NULL the + loop can't be optimized. */ + basic_block incoming_src; + basic_block incoming_dest; + + /* First block in the loop. This is the one branched to by the loop_end + insn. */ + basic_block head; + + /* Last block in the loop (the one with the loop_end insn). */ + basic_block tail; + + /* The successor block of the loop. This is the one the loop_end insn + falls into. */ + basic_block successor; + + /* The last instruction in the tail. */ + rtx last_insn; + + /* The loop_end insn. */ + rtx loop_end; + + /* The iteration register. */ + rtx iter_reg; + + /* The new label placed at the beginning of the loop. */ + rtx start_label; + + /* The new label placed at the end of the loop. */ + rtx end_label; + + /* The length of the loop. */ + int length; + + /* The nesting depth of the loop. Innermost loops are given a depth + of 1. Only successfully optimized doloops are counted; if an inner + loop was marked as bad, it does not increase the depth of its parent + loop. + This value is valid when the target's optimize function is called. */ + int depth; + + /* True if we can't optimize this loop. */ + bool bad; + + /* True if we have visited this loop during the optimization phase. */ + bool visited; + + /* The following values are collected before calling the target's optimize + function and are not valid earlier. */ + + /* Record information about control flow: whether the loop has calls + or asm statements, whether it has edges that jump out of the loop, + or edges that jump within the loop. */ + bool has_call; + bool has_asm; + bool jumps_within; + bool jumps_outof; + + /* True if there is an instruction other than the doloop_end which uses the + iteration register. */ + bool iter_reg_used; + /* True if the iteration register lives past the doloop instruction. */ + bool iter_reg_used_outside; + + /* Hard registers set at any point in the loop, except for the loop counter + register's set in the doloop_end instruction. */ + HARD_REG_SET regs_set_in_loop; +}; + +/* A set of hooks to be defined by a target that wants to use the reorg_loops + functionality. + + reorg_loops is intended to handle cases where special hardware loop + setup instructions are required before the loop, for example to set + up loop counter registers that are not exposed to the register + allocator, or to inform the hardware about loop bounds. + + reorg_loops performs analysis to discover loop_end patterns created + by the earlier loop-doloop pass, and sets up a hwloop_info + structure for each such insn it finds. It then tries to discover + the basic blocks containing the loop by tracking the lifetime of + the iteration register. + + If a valid loop can't be found, the FAIL function is called; + otherwise the OPT function is called for each loop, visiting + innermost loops first and ascending. */ +struct hw_doloop_hooks +{ + /* Examine INSN. If it is a suitable doloop_end pattern, return the + iteration register, which should be a single hard register. + Otherwise, return NULL_RTX. */ + rtx (*end_pattern_reg) (rtx insn); + /* Optimize LOOP. The target should perform any additional analysis + (e.g. checking that the loop isn't too long), and then perform + its transformations. Return true if successful, false if the + loop should be marked bad. If it returns false, the FAIL + function is called. */ + bool (*opt) (hwloop_info loop); + /* Handle a loop that was marked bad for any reason. This could be + used to split the doloop_end pattern. */ + void (*fail) (hwloop_info loop); +}; + +extern void reorg_loops (bool, struct hw_doloop_hooks *); |