diff options
author | rakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4> | 2003-02-08 14:29:00 +0000 |
---|---|---|
committer | rakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4> | 2003-02-08 14:29:00 +0000 |
commit | 6a606e3ca54b94355b76625a904ca6d45dc33cbf (patch) | |
tree | a5f2166463824a1a17e45cfd6cfe1c23ebc0c708 | |
parent | 17cb77de2f4983570c6defc432c9012fbb5ab9ef (diff) | |
download | gcc-6a606e3ca54b94355b76625a904ca6d45dc33cbf.tar.gz |
* cfgloop.h (fix_loop_placement, can_duplicate_loop_p,
duplicate_loop_to_header_edge, loopify, remove_path, split_loop_bb):
Declare.
(DLTHE_FLAG_UPDATE_FREQ): New.
* cfgloopmanip.c (duplicate_loop, duplicate_subloops, copy_loops_to,
loop_redirect_edge, loop_delete_branch_edge, copy_bbs, remove_bbs,
rpe_enum_p, find_branch, alp_enum_p, add_loop, fix_loop_placements,
fix_bb_placement, fix_bb_placements, place_new_loop,
scale_loop_frequencies, scale_bbs_frequencies, record_exit_edges):
New static functions.
(fix_loop_placement, can_duplicate_loop_p,
duplicate_loop_to_header_edge, loopify, remove_path, split_loop_bb):
New functions.
* cfgloop.h (loop_optimizer_init, loop_optimizer_finalize,
unswitch_loops): Declare.
* loop-init.c: New file.
* loop-unswitch.c: New file.
* Makefile.in (loop-init.o, loop-unswitch.o): New.
* params.def (PARAM_MAX_UNSWITCH_INSNS, PARAM_MAX_UNSWITCH_LEVEL): New.
* toplev.c (DFI_loop2): New dump.
(flag_unswitch_loops): New.
(lang_independent_options): Add it.
(rest_of_compilation): Call new loop optimizer.
(parse_options_and_default_flags): Turn flag_unswitch_loops on with -O3.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@62578 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 28 | ||||
-rw-r--r-- | gcc/Makefile.in | 8 | ||||
-rw-r--r-- | gcc/cfgloop.h | 25 | ||||
-rw-r--r-- | gcc/cfgloopmanip.c | 1178 | ||||
-rw-r--r-- | gcc/loop-init.c | 116 | ||||
-rw-r--r-- | gcc/loop-unswitch.c | 412 | ||||
-rw-r--r-- | gcc/params.def | 11 | ||||
-rw-r--r-- | gcc/toplev.c | 40 |
8 files changed, 1810 insertions, 8 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9a3fdcab53d..418ff2a5529 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,31 @@ +2003-02-08 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz> + + * cfgloop.h (fix_loop_placement, can_duplicate_loop_p, + duplicate_loop_to_header_edge, loopify, remove_path, split_loop_bb): + Declare. + (DLTHE_FLAG_UPDATE_FREQ): New. + * cfgloopmanip.c (duplicate_loop, duplicate_subloops, copy_loops_to, + loop_redirect_edge, loop_delete_branch_edge, copy_bbs, remove_bbs, + rpe_enum_p, find_branch, alp_enum_p, add_loop, fix_loop_placements, + fix_bb_placement, fix_bb_placements, place_new_loop, + scale_loop_frequencies, scale_bbs_frequencies, record_exit_edges): + New static functions. + (fix_loop_placement, can_duplicate_loop_p, + duplicate_loop_to_header_edge, loopify, remove_path, split_loop_bb): + New functions. + + * cfgloop.h (loop_optimizer_init, loop_optimizer_finalize, + unswitch_loops): Declare. + * loop-init.c: New file. + * loop-unswitch.c: New file. + * Makefile.in (loop-init.o, loop-unswitch.o): New. + * params.def (PARAM_MAX_UNSWITCH_INSNS, PARAM_MAX_UNSWITCH_LEVEL): New. + * toplev.c (DFI_loop2): New dump. + (flag_unswitch_loops): New. + (lang_independent_options): Add it. + (rest_of_compilation): Call new loop optimizer. + (parse_options_and_default_flags): Turn flag_unswitch_loops on with -O3. + 2003-02-08 Kazu Hirata <kazu@cs.umass.edu> * config/h8300/clzsi2.c: New. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 33745f9dcb2..48a12ce080d 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -768,7 +768,7 @@ C_OBJS = c-parse.o c-lang.o c-pretty-print.o $(C_AND_OBJC_OBJS) OBJS = alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o \ cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o \ - cfgloopanal.o cfgloopmanip.o \ + cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o \ cfgrtl.o combine.o conflict.o convert.o cse.o cselib.o dbxout.o \ debug.o df.o diagnostic.o doloop.o dominance.o \ dwarf2asm.o dwarf2out.o dwarfout.o emit-rtl.o except.o explow.o \ @@ -1600,6 +1600,12 @@ cfgloopanal.o : cfgloopanal.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \ $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H) cfgloopmanip.o : cfgloopmanip.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \ $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h output.h coretypes.h $(TM_H) +loop-init.o : loop-init.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) gcov-io.h \ + gcov-iov.h $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h profile.h \ + coretypes.h $(TM_H) +loop-unswitch.o : loop-unswitch.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TM_H) \ + $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h params.h \ + output.h $(EXPR_H) coretypes.h $(TM_H) dominance.o : dominance.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ hard-reg-set.h $(BASIC_BLOCK_H) et-forest.h et-forest.o : et-forest.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) et-forest.h alloc-pool.h diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 0d4142fafee..c46602f790a 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -293,6 +293,7 @@ extern void cancel_loop PARAMS ((struct loops *, struct loop *)); extern void cancel_loop_tree PARAMS ((struct loops *, struct loop *)); extern basic_block loop_split_edge_with PARAMS ((edge, rtx, struct loops *)); +extern int fix_loop_placement PARAMS ((struct loop *)); enum { @@ -312,3 +313,27 @@ extern rtx count_loop_iterations PARAMS ((struct loop_desc *, rtx, rtx)); extern bool just_once_each_iteration_p PARAMS ((struct loops *,struct loop *, basic_block)); extern unsigned expected_loop_iterations PARAMS ((const struct loop *)); + +/* Loop manipulation. */ +extern bool can_duplicate_loop_p PARAMS ((struct loop *loop)); + +#define DLTHE_FLAG_UPDATE_FREQ 1 /* Update frequencies in + duplicate_loop_to_header_edge. */ + +extern int duplicate_loop_to_header_edge PARAMS ((struct loop *, edge, + struct loops *, unsigned, + sbitmap, edge, edge *, + unsigned *, int)); +extern struct loop *loopify PARAMS ((struct loops *, edge, + edge, basic_block)); +extern bool remove_path PARAMS ((struct loops *, edge)); +extern edge split_loop_bb PARAMS ((struct loops *, basic_block, + rtx)); + +/* Loop optimizer initialization. */ +extern struct loops *loop_optimizer_init PARAMS ((FILE *)); +extern void loop_optimizer_finalize PARAMS ((struct loops *, FILE *)); + +/* Optimization passes. */ +extern void unswitch_loops PARAMS ((struct loops *)); + diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index e22cb12caf7..69cb63b21dd 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -29,13 +29,1175 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "cfglayout.h" #include "output.h" +static struct loop * duplicate_loop PARAMS ((struct loops *, + struct loop *, struct loop *)); +static void duplicate_subloops PARAMS ((struct loops *, struct loop *, + struct loop *)); +static void copy_loops_to PARAMS ((struct loops *, struct loop **, + int, struct loop *)); +static void loop_redirect_edge PARAMS ((edge, basic_block)); +static bool loop_delete_branch_edge PARAMS ((edge)); +static void copy_bbs PARAMS ((basic_block *, int, edge, + edge, basic_block **, + struct loops *, edge *, + edge *, int)); +static void remove_bbs PARAMS ((dominance_info, basic_block *, + int)); +static bool rpe_enum_p PARAMS ((basic_block, void *)); +static int find_path PARAMS ((edge, dominance_info, + basic_block **)); +static bool alp_enum_p PARAMS ((basic_block, void *)); +static void add_loop PARAMS ((struct loops *, struct loop *)); +static void fix_loop_placements PARAMS ((struct loop *)); +static bool fix_bb_placement PARAMS ((struct loops *, basic_block)); +static void fix_bb_placements PARAMS ((struct loops *, basic_block)); +static void place_new_loop PARAMS ((struct loops *, struct loop *)); +static void scale_loop_frequencies PARAMS ((struct loop *, int, int)); +static void scale_bbs_frequencies PARAMS ((basic_block *, int, int, int)); +static void record_exit_edges PARAMS ((edge, basic_block *, int, + edge *, unsigned *, int)); static basic_block create_preheader PARAMS ((struct loop *, dominance_info, int)); +/* Splits basic block BB after INSN, returns created edge. Updates loops + and dominators. */ +edge +split_loop_bb (loops, bb, insn) + struct loops *loops; + basic_block bb; + rtx insn; +{ + edge e; + basic_block *dom_bbs; + int n_dom_bbs, i; + + /* Split the block. */ + e = split_block (bb, insn); + + /* Add dest to loop. */ + add_bb_to_loop (e->dest, e->src->loop_father); + + /* Fix dominators. */ + add_to_dominance_info (loops->cfg.dom, e->dest); + n_dom_bbs = get_dominated_by (loops->cfg.dom, e->src, &dom_bbs); + for (i = 0; i < n_dom_bbs; i++) + set_immediate_dominator (loops->cfg.dom, dom_bbs[i], e->dest); + free (dom_bbs); + set_immediate_dominator (loops->cfg.dom, e->dest, e->src); + + /* Take care of RBI. */ + alloc_aux_for_block (e->dest, sizeof (struct reorder_block_def)); + + return e; +} + +/* Checks whether basic block BB is dominated by RPE->DOM, where + RPE is passed through DATA. */ +struct rpe_data + { + basic_block dom; + dominance_info doms; + }; + +static bool +rpe_enum_p (bb, data) + basic_block bb; + void *data; +{ + struct rpe_data *rpe = data; + return dominated_by_p (rpe->doms, bb, rpe->dom); +} + +/* Remove basic blocks BBS from loop structure and dominance info, + and delete them afterwards. */ +static void +remove_bbs (dom, bbs, nbbs) + dominance_info dom; + basic_block *bbs; + int nbbs; +{ + int i; + + for (i = 0; i < nbbs; i++) + { + remove_bb_from_loops (bbs[i]); + delete_from_dominance_info (dom, bbs[i]); + flow_delete_block (bbs[i]); + } +} + +/* Find path -- i.e. the basic blocks dominated by edge E and put them + into array BBS, that will be allocated large enough to contain them. + The number of basic blocks in the path is returned. */ +static int +find_path (e, doms, bbs) + edge e; + dominance_info doms; + basic_block **bbs; +{ + edge ae = NULL; + struct rpe_data rpe; + + if (e->dest->pred->pred_next) + { + for (ae = e->dest->pred; ae; ae = ae->pred_next) + if (ae != e && !dominated_by_p (doms, ae->src, e->dest)) + break; + } + if (ae) + { + /* The path is formed just by the edge. */ + *bbs = NULL; + return 0; + } + + /* Find bbs in the path. */ + rpe.dom = e->dest; + rpe.doms = doms; + *bbs = xcalloc (n_basic_blocks, sizeof (basic_block)); + return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs, + n_basic_blocks, &rpe); +} + +/* Fix placement of basic block BB inside loop hierarchy stored in LOOPS -- + Let L be a loop to that BB belongs. Then every successor of BB must either + 1) belong to some superloop of loop L, or + 2) be a header of loop K such that K->outer is superloop of L + Returns true if we had to move BB into other loop to enforce this condition, + false if the placement of BB was already correct (provided that placements + of its successors are correct). */ +static bool +fix_bb_placement (loops, bb) + struct loops *loops; + basic_block bb; +{ + edge e; + struct loop *loop = loops->tree_root, *act; + + for (e = bb->succ; e; e = e->succ_next) + { + if (e->dest == EXIT_BLOCK_PTR) + continue; + + act = e->dest->loop_father; + if (act->header == e->dest) + act = act->outer; + + if (flow_loop_nested_p (loop, act)) + loop = act; + } + + if (loop == bb->loop_father) + return false; + + remove_bb_from_loops (bb); + add_bb_to_loop (bb, loop); + + return true; +} + +/* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e. + enforce condition condition stated in description of fix_bb_placement. We + start from basic block FROM that had some of its successors removed, so that + his placement no longer has to be correct, and iteratively fix placement of + its predecessors that may change if placement of FROM changed. Also fix + placement of subloops of FROM->loop_father, that might also be altered due + to this change; the condition for them is simmilar, except that instead of + successors we consider edges coming out of the loops. */ +static void +fix_bb_placements (loops, from) + struct loops *loops; + basic_block from; +{ + sbitmap in_queue; + basic_block *queue, *qtop, *qbeg, *qend; + struct loop *base_loop; + edge e; + + /* We pass through blocks back-reachable from FROM, testing whether some + of their successors moved to outer loop. It may be necessary to + iterate several times, but it is finite, as we stop unless we move + the basic block up the loop structure. The whole story is a bit + more complicated due to presence of subloops, those are moved using + fix_loop_placement. */ + + base_loop = from->loop_father; + if (base_loop == loops->tree_root) + return; + + in_queue = sbitmap_alloc (last_basic_block); + sbitmap_zero (in_queue); + SET_BIT (in_queue, from->index); + /* Prevent us from going out of the base_loop. */ + SET_BIT (in_queue, base_loop->header->index); + + queue = xcalloc (base_loop->num_nodes + 1, sizeof (basic_block)); + qtop = queue + base_loop->num_nodes + 1; + qbeg = queue; + qend = queue + 1; + *qbeg = from; + + while (qbeg != qend) + { + from = *qbeg; + qbeg++; + if (qbeg == qtop) + qbeg = queue; + RESET_BIT (in_queue, from->index); + + if (from->loop_father->header == from) + { + /* Subloop header, maybe move the loop upward. */ + if (!fix_loop_placement (from->loop_father)) + continue; + } + else + { + /* Ordinary basic block. */ + if (!fix_bb_placement (loops, from)) + continue; + } + + /* Something has changed, insert predecessors into queue. */ + for (e = from->pred; e; e = e->pred_next) + { + basic_block pred = e->src; + struct loop *nca; + + if (TEST_BIT (in_queue, pred->index)) + continue; + + /* If it is subloop, then it either was not moved, or + the path up the loop tree from base_loop do not contain + it. */ + nca = find_common_loop (pred->loop_father, base_loop); + if (pred->loop_father != base_loop + && (nca == base_loop + || nca != pred->loop_father)) + pred = pred->loop_father->header; + else if (!flow_loop_nested_p (from->loop_father, pred->loop_father)) + { + /* No point in processing it. */ + continue; + } + + if (TEST_BIT (in_queue, pred->index)) + continue; + + /* Schedule the basic block. */ + *qend = pred; + qend++; + if (qend == qtop) + qend = queue; + SET_BIT (in_queue, pred->index); + } + } + free (in_queue); + free (queue); +} + +/* Removes path beginning at edge E, i.e. remove basic blocks dominated by E + and update loop structure stored in LOOPS and dominators. Return true if + we were able to remove the path, false otherwise (and nothing is affected + then). */ +bool +remove_path (loops, e) + struct loops *loops; + edge e; +{ + edge ae; + basic_block *rem_bbs, *bord_bbs, *dom_bbs, from, bb; + int i, nrem, n_bord_bbs, n_dom_bbs; + sbitmap seen; + + /* First identify the path. */ + nrem = find_path (e, loops->cfg.dom, &rem_bbs); + + n_bord_bbs = 0; + bord_bbs = xcalloc (n_basic_blocks, sizeof (basic_block)); + seen = sbitmap_alloc (last_basic_block); + sbitmap_zero (seen); + + /* Find "border" hexes -- i.e. those with predecessor in removed path. */ + for (i = 0; i < nrem; i++) + SET_BIT (seen, rem_bbs[i]->index); + if (nrem) + { + for (i = 0; i < nrem; i++) + { + bb = rem_bbs[i]; + for (ae = rem_bbs[i]->succ; ae; ae = ae->succ_next) + if (ae->dest != EXIT_BLOCK_PTR && !TEST_BIT (seen, ae->dest->index)) + { + SET_BIT (seen, ae->dest->index); + bord_bbs[n_bord_bbs++] = ae->dest; + } + } + } + else if (e->dest != EXIT_BLOCK_PTR) + bord_bbs[n_bord_bbs++] = e->dest; + + /* Remove the path. */ + from = e->src; + if (!loop_delete_branch_edge (e)) + { + free (rem_bbs); + free (bord_bbs); + free (seen); + return false; + } + dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block)); + + /* Cancel loops contained in the path. */ + for (i = 0; i < nrem; i++) + if (rem_bbs[i]->loop_father->header == rem_bbs[i]) + cancel_loop_tree (loops, rem_bbs[i]->loop_father); + + remove_bbs (loops->cfg.dom, rem_bbs, nrem); + free (rem_bbs); + + /* Find blocks with whose dominators may be affected. */ + n_dom_bbs = 0; + sbitmap_zero (seen); + for (i = 0; i < n_bord_bbs; i++) + { + int j, nldom; + basic_block *ldom; + + bb = get_immediate_dominator (loops->cfg.dom, bord_bbs[i]); + if (TEST_BIT (seen, bb->index)) + continue; + SET_BIT (seen, bb->index); + + nldom = get_dominated_by (loops->cfg.dom, bb, &ldom); + for (j = 0; j < nldom; j++) + if (!dominated_by_p (loops->cfg.dom, from, ldom[j])) + dom_bbs[n_dom_bbs++] = ldom[j]; + free(ldom); + } + + free (bord_bbs); + free (seen); + + /* Recount dominators. */ + iterate_fix_dominators (loops->cfg.dom, dom_bbs, n_dom_bbs); + free (dom_bbs); + + /* Fix placements of basic blocks inside loops and the placement of + loops in the loop tree. */ + fix_bb_placements (loops, from); + fix_loop_placements (from->loop_father); + + return true; +} + +/* Predicate for enumeration in add_loop. */ +static bool +alp_enum_p (bb, alp_header) + basic_block bb; + void *alp_header; +{ + return bb != (basic_block) alp_header; +} + +/* Given LOOP structure with filled header and latch, find the body of the + corresponding loop and add it to LOOPS tree. */ +static void +add_loop (loops, loop) + struct loops *loops; + struct loop *loop; +{ + basic_block *bbs; + int i, n; + + /* Add it to loop structure. */ + place_new_loop (loops, loop); + loop->level = 1; + + /* Find its nodes. */ + bbs = xcalloc (n_basic_blocks, sizeof (basic_block)); + n = dfs_enumerate_from (loop->latch, 1, alp_enum_p, + bbs, n_basic_blocks, loop->header); + + for (i = 0; i < n; i++) + add_bb_to_loop (bbs[i], loop); + add_bb_to_loop (loop->header, loop); + + free (bbs); +} + +/* Multiply all frequencies of basic blocks in array BBS of lenght NBBS + by NUM/DEN. */ +static void +scale_bbs_frequencies (bbs, nbbs, num, den) + basic_block *bbs; + int nbbs; + int num; + int den; +{ + int i; + edge e; + + for (i = 0; i < nbbs; i++) + { + bbs[i]->frequency = (bbs[i]->frequency * num) / den; + bbs[i]->count = (bbs[i]->count * num) / den; + for (e = bbs[i]->succ; e; e = e->succ_next) + e->count = (e->count * num) /den; + } +} + +/* Multiply all frequencies in LOOP by NUM/DEN. */ +static void +scale_loop_frequencies (loop, num, den) + struct loop *loop; + int num; + int den; +{ + basic_block *bbs; + + bbs = get_loop_body (loop); + scale_bbs_frequencies (bbs, loop->num_nodes, num, den); + free (bbs); +} + +/* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting + latch to header and update loop tree stored in LOOPS and dominators + accordingly. Everything between them plus LATCH_EDGE destination must + be dominated by HEADER_EDGE destination, and back-reachable from + LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB, + SWITCH_BB->succ to original destination of LATCH_EDGE and + SWITCH_BB->succ->succ_next to original destination of HEADER_EDGE. + Returns newly created loop. */ +struct loop * +loopify (loops, latch_edge, header_edge, switch_bb) + struct loops *loops; + edge latch_edge; + edge header_edge; + basic_block switch_bb; +{ + basic_block succ_bb = latch_edge->dest; + basic_block pred_bb = header_edge->src; + basic_block *dom_bbs, *body; + unsigned n_dom_bbs, i, j; + sbitmap seen; + struct loop *loop = xcalloc (1, sizeof (struct loop)); + struct loop *outer = succ_bb->loop_father->outer; + int freq, prob, tot_prob; + gcov_type cnt; + edge e; + + loop->header = header_edge->dest; + loop->latch = latch_edge->src; + + freq = EDGE_FREQUENCY (header_edge); + cnt = header_edge->count; + prob = switch_bb->succ->probability; + tot_prob = prob + switch_bb->succ->succ_next->probability; + if (tot_prob == 0) + tot_prob = 1; + + /* Redirect edges. */ + loop_redirect_edge (latch_edge, loop->header); + loop_redirect_edge (header_edge, switch_bb); + loop_redirect_edge (switch_bb->succ->succ_next, loop->header); + loop_redirect_edge (switch_bb->succ, succ_bb); + + /* Update dominators. */ + set_immediate_dominator (loops->cfg.dom, switch_bb, pred_bb); + set_immediate_dominator (loops->cfg.dom, loop->header, switch_bb); + set_immediate_dominator (loops->cfg.dom, succ_bb, switch_bb); + + /* Compute new loop. */ + add_loop (loops, loop); + flow_loop_tree_node_add (outer, loop); + + /* Add switch_bb to appropriate loop. */ + add_bb_to_loop (switch_bb, outer); + + /* Fix frequencies. */ + switch_bb->frequency = freq; + switch_bb->count = cnt; + for (e = switch_bb->succ; e; e = e->succ_next) + e->count = (switch_bb->count * e->probability) / REG_BR_PROB_BASE; + scale_loop_frequencies (loop, prob, tot_prob); + scale_loop_frequencies (succ_bb->loop_father, tot_prob - prob, tot_prob); + + /* Update dominators of blocks outside of LOOP. */ + dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block)); + n_dom_bbs = 0; + seen = sbitmap_alloc (last_basic_block); + sbitmap_zero (seen); + body = get_loop_body (loop); + + for (i = 0; i < loop->num_nodes; i++) + SET_BIT (seen, body[i]->index); + + for (i = 0; i < loop->num_nodes; i++) + { + unsigned nldom; + basic_block *ldom; + + nldom = get_dominated_by (loops->cfg.dom, body[i], &ldom); + for (j = 0; j < nldom; j++) + if (!TEST_BIT (seen, ldom[j]->index)) + { + SET_BIT (seen, ldom[j]->index); + dom_bbs[n_dom_bbs++] = ldom[j]; + } + free (ldom); + } + + iterate_fix_dominators (loops->cfg.dom, dom_bbs, n_dom_bbs); + + free (body); + free (seen); + free (dom_bbs); + + return loop; +} + +/* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop + FATHER of LOOP such that all of the edges comming out of LOOP belong to + FATHER, and set it as outer loop of LOOP. Return 1 if placement of + LOOP changed. */ +int +fix_loop_placement (loop) + struct loop *loop; +{ + basic_block *body; + unsigned i; + edge e; + struct loop *father = loop->pred[0], *act; + + body = get_loop_body (loop); + for (i = 0; i < loop->num_nodes; i++) + for (e = body[i]->succ; e; e = e->succ_next) + if (!flow_bb_inside_loop_p (loop, e->dest)) + { + act = find_common_loop (loop, e->dest->loop_father); + if (flow_loop_nested_p (father, act)) + father = act; + } + free (body); + + if (father != loop->outer) + { + for (act = loop->outer; act != father; act = act->outer) + act->num_nodes -= loop->num_nodes; + flow_loop_tree_node_remove (loop); + flow_loop_tree_node_add (father, loop); + return 1; + } + return 0; +} + +/* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that + condition stated in description of fix_loop_placement holds for them. + It is used in case when we removed some edges coming out of LOOP, which + may cause the right placement of LOOP inside loop tree to change. */ +static void +fix_loop_placements (loop) + struct loop *loop; +{ + struct loop *outer; + + while (loop->outer) + { + outer = loop->outer; + if (!fix_loop_placement (loop)) + break; + loop = outer; + } +} + +/* Creates place for a new LOOP in LOOPS structure. */ +static void +place_new_loop (loops, loop) + struct loops *loops; + struct loop *loop; +{ + loops->parray = + xrealloc (loops->parray, (loops->num + 1) * sizeof (struct loop *)); + loops->parray[loops->num] = loop; + + loop->num = loops->num++; +} + +/* Copies copy of LOOP as subloop of TARGET loop, placing newly + created loop into LOOPS structure. */ +static struct loop * +duplicate_loop (loops, loop, target) + struct loops *loops; + struct loop *loop; + struct loop *target; +{ + struct loop *cloop; + cloop = xcalloc (1, sizeof (struct loop)); + place_new_loop (loops, cloop); + + /* Initialize copied loop. */ + cloop->level = loop->level; + + /* Set it as copy of loop. */ + loop->copy = cloop; + + /* Add it to target. */ + flow_loop_tree_node_add (target, cloop); + + return cloop; +} + +/* Copies structure of subloops of LOOP into TARGET loop, placing + newly created loops into loop tree stored in LOOPS. */ +static void +duplicate_subloops (loops, loop, target) + struct loops *loops; + struct loop *loop; + struct loop *target; +{ + struct loop *aloop, *cloop; + + for (aloop = loop->inner; aloop; aloop = aloop->next) + { + cloop = duplicate_loop (loops, aloop, target); + duplicate_subloops (loops, aloop, cloop); + } +} + +/* Copies structure of subloops of N loops, stored in array COPIED_LOOPS, + into TARGET loop, placing newly created loops into loop tree LOOPS. */ +static void +copy_loops_to (loops, copied_loops, n, target) + struct loops *loops; + struct loop **copied_loops; + int n; + struct loop *target; +{ + struct loop *aloop; + int i; + + for (i = 0; i < n; i++) + { + aloop = duplicate_loop (loops, copied_loops[i], target); + duplicate_subloops (loops, copied_loops[i], aloop); + } +} + +/* Redirects edge E to basic block DEST. */ +static void +loop_redirect_edge (e, dest) + edge e; + basic_block dest; +{ + if (e->dest == dest) + return; + + cfg_layout_redirect_edge (e, dest); +} + +/* Deletes edge E from a branch if possible. */ +static bool +loop_delete_branch_edge (e) + edge e; +{ + basic_block src = e->src; + + if (src->succ->succ_next) + { + basic_block newdest; + /* Cannot handle more than two exit edges. */ + if (src->succ->succ_next->succ_next) + return false; + /* And it must be just a simple branch. */ + if (!any_condjump_p (src->end)) + return false; + + newdest = (e == src->succ + ? src->succ->succ_next->dest : src->succ->dest); + if (newdest == EXIT_BLOCK_PTR) + return false; + + return cfg_layout_redirect_edge (e, newdest); + } + else + { + /* Cannot happen -- we are using this only to remove an edge + from branch. */ + abort (); + } + + return false; /* To avoid warning, cannot get here. */ +} + +/* Duplicates N basic blocks stored in array BBS (they form a body of + duplicated loop). Newly created basic blocks are placed into array NEW_BBS + that we allocate. Edges from basic blocks in BBS are also duplicated and + copies of those of them that lead into BBS are redirected to appropriate + newly created block. The function also assigns bbs into loops and updates + dominators. If ADD_IRREDUCIBLE_FLAG is set, newly created basic blocks that + are not members of any inner loop are marked irreducible. + + Additionally, we perform following manipulation with edges: + We have two special edges given. LATCH_EDGE is the latch edge of the + duplicated loop and leads into its header (one of blocks in BBS); + it does not have neccessarily lead from one of the blocks, because + we may be copying the loop body several times in unrolling. + Edge ENTRY leads also leads to header, and it is either latch or entry + edge. Copy of LATCH_EDGE is redirected to header and is stored in + HEADER_EDGE, the ENTRY edge is redirected into copy of header and + returned as COPY_HEADER_EDGE. The effect is following: + if LATCH_EDGE == ENTRY, then the loop is unrolled by one copy, + HEADER_EDGE is latch of a new loop, COPY_HEADER_EDGE leads from original + latch source to first block in copy. + if LATCH_EDGE != ENTRY, then the loop is peeled by one copy, + HEADER_EDGE is entry edge of the loop, COPY_HEADER_EDGE leads from + original entry block to first block in peeled copy. + */ +static void +copy_bbs (bbs, n, entry, latch_edge, new_bbs, loops, header_edge, copy_header_edge, add_irreducible_flag) + basic_block *bbs; + int n; + edge entry; + edge latch_edge; + basic_block **new_bbs; + struct loops *loops; + edge *header_edge; + edge *copy_header_edge; + int add_irreducible_flag; +{ + int i; + basic_block bb, new_bb, header = entry->dest, dom_bb; + edge e; + + /* Duplicate bbs, update dominators, assign bbs to loops. */ + (*new_bbs) = xcalloc (n, sizeof (basic_block)); + for (i = 0; i < n; i++) + { + /* Duplicate. */ + bb = bbs[i]; + new_bb = (*new_bbs)[i] = cfg_layout_duplicate_bb (bb, NULL); + RBI (new_bb)->duplicated = 1; + /* Add to loop. */ + add_bb_to_loop (new_bb, bb->loop_father->copy); + add_to_dominance_info (loops->cfg.dom, new_bb); + /* Possibly set header. */ + if (bb->loop_father->header == bb && bb != header) + new_bb->loop_father->header = new_bb; + /* Or latch. */ + if (bb->loop_father->latch == bb && + bb->loop_father != header->loop_father) + new_bb->loop_father->latch = new_bb; + /* Take care of irreducible loops. */ + if (add_irreducible_flag + && bb->loop_father == header->loop_father) + new_bb->flags |= BB_IRREDUCIBLE_LOOP; + } + + /* Set dominators. */ + for (i = 0; i < n; i++) + { + bb = bbs[i]; + new_bb = (*new_bbs)[i]; + if (bb != header) + { + /* For anything else than loop header, just copy it. */ + dom_bb = get_immediate_dominator (loops->cfg.dom, bb); + dom_bb = RBI (dom_bb)->copy; + } + else + { + /* Copy of header is dominated by entry source. */ + dom_bb = entry->src; + } + if (!dom_bb) + abort (); + set_immediate_dominator (loops->cfg.dom, new_bb, dom_bb); + } + + /* Redirect edges. */ + for (i = 0; i < n; i++) + { + edge e_pred; + new_bb = (*new_bbs)[i]; + bb = bbs[i]; + for (e = bb->pred; e; e = e_pred) + { + basic_block src = e->src; + + e_pred = e->pred_next; + + if (!RBI (src)->duplicated) + continue; + + /* Leads to copied loop and it is not latch edge, redirect it. */ + if (bb != header) + loop_redirect_edge (e, new_bb); + } + } + + /* Redirect header edge. */ + bb = RBI (latch_edge->src)->copy; + for (e = bb->succ; e->dest != latch_edge->dest; e = e->succ_next); + *header_edge = e; + loop_redirect_edge (*header_edge, header); + + /* Redirect entry to copy of header. */ + loop_redirect_edge (entry, RBI (header)->copy); + *copy_header_edge = entry; + + /* Clear information about duplicates. */ + for (i = 0; i < n; i++) + RBI ((*new_bbs)[i])->duplicated = 0; +} + +/* Check whether LOOP's body can be duplicated. */ +bool +can_duplicate_loop_p (loop) + struct loop *loop; +{ + basic_block *bbs; + unsigned i; + + bbs = get_loop_body (loop); + + for (i = 0; i < loop->num_nodes; i++) + { + edge e; + + /* In case loop contains abnormal edge we can not redirect, + we can't perform duplication. */ + + for (e = bbs[i]->succ; e; e = e->succ_next) + if ((e->flags & EDGE_ABNORMAL) + && flow_bb_inside_loop_p (loop, e->dest)) + { + free (bbs); + return false; + } + + if (!cfg_layout_can_duplicate_bb_p (bbs[i])) + { + free (bbs); + return false; + } + } + free (bbs); + + return true; +} + +/* Record edges, leading from NBBS basic blocks stored in BBS, that were created + by copying ORIG edge (or just ORIG edge if IS_ORIG is set). + If ORIG is NULL, then record all edges coming outside of BBS. Store them + into TO_REMOVE array that must be large enough to hold them all; their + number is returned in N_TO_REMOVE. */ +static void +record_exit_edges (orig, bbs, nbbs, to_remove, n_to_remove, is_orig) + edge orig; + basic_block *bbs; + int nbbs; + edge *to_remove; + unsigned *n_to_remove; + int is_orig; +{ + sbitmap my_blocks; + int i; + edge e; + + if (orig) + { + if (is_orig) + { + to_remove[(*n_to_remove)++] = orig; + return; + } + + for (e = RBI (orig->src)->copy->succ; e; e = e->succ_next) + if (e->dest == orig->dest) + break; + if (!e) + abort (); + + to_remove[(*n_to_remove)++] = e; + } + else + { + my_blocks = sbitmap_alloc (last_basic_block); + sbitmap_zero (my_blocks); + for (i = 0; i < nbbs; i++) + SET_BIT (my_blocks, bbs[i]->index); + + for (i = 0; i < nbbs; i++) + for (e = bbs[i]->succ; e; e = e->succ_next) + if (e->dest == EXIT_BLOCK_PTR || + !TEST_BIT (my_blocks, e->dest->index)) + to_remove[(*n_to_remove)++] = e; + + free (my_blocks); + } +} + + +#define RDIV(X,Y) (((X) + (Y) / 2) / (Y)) + +/* Duplicates body of LOOP to given edge E NDUPL times. Takes care of + updating LOOPS structure and dominators. E's destination must be LOOP + header for this to work, i.e. it must be entry or latch edge of this loop; + these are unique, as the loops must have preheaders for this function to + work correctly (in case E is latch, the function unrolls the loop, if E is + entry edge, it peels the loop). Store edges created by copying ORIG edge + (if NULL, then all edges leaving loop) from copies corresponding to set + bits in WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the + other copies are numbered in order given by control flow through them) + into TO_REMOVE array. Returns false if duplication is impossible. */ +int +duplicate_loop_to_header_edge (loop, e, loops, ndupl, wont_exit, orig, + to_remove, n_to_remove, flags) + struct loop *loop; + edge e; + struct loops *loops; + unsigned ndupl; + sbitmap wont_exit; + edge orig; + edge *to_remove; + unsigned *n_to_remove; + int flags; +{ + struct loop *target, *aloop; + struct loop **orig_loops; + unsigned n_orig_loops; + basic_block header = loop->header, latch = loop->latch; + basic_block *new_bbs, *bbs, *first_active; + basic_block new_bb, bb, first_active_latch = NULL; + edge ae, latch_edge, he; + unsigned i, j, n; + int is_latch = (latch == e->src); + int scale_act = 0, *scale_step = NULL, scale_main = 0; + int p, freq_in, freq_le, freq_out_orig; + int prob_pass_thru, prob_pass_wont_exit, prob_pass_main; + int add_irreducible_flag; + + if (e->dest != loop->header) + abort (); + if (ndupl <= 0) + abort (); + + if (orig) + { + /* Orig must be edge out of the loop. */ + if (!flow_bb_inside_loop_p (loop, orig->src)) + abort (); + if (flow_bb_inside_loop_p (loop, orig->dest)) + abort (); + } + + bbs = get_loop_body (loop); + + /* Check whether duplication is possible. */ + + for (i = 0; i < loop->num_nodes; i++) + { + if (!cfg_layout_can_duplicate_bb_p (bbs[i])) + { + free (bbs); + return false; + } + } + + add_irreducible_flag = !is_latch && (e->src->flags & BB_IRREDUCIBLE_LOOP); + + /* Find edge from latch. */ + latch_edge = loop_latch_edge (loop); + + if (flags & DLTHE_FLAG_UPDATE_FREQ) + { + /* Calculate coefficients by that we have to scale frequencies + of duplicated loop bodies. */ + freq_in = header->frequency; + freq_le = EDGE_FREQUENCY (latch_edge); + if (freq_in == 0) + freq_in = 1; + if (freq_in < freq_le) + freq_in = freq_le; + freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le; + if (freq_out_orig > freq_in - freq_le) + freq_out_orig = freq_in - freq_le; + prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in); + prob_pass_wont_exit = + RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in); + + scale_step = xmalloc (ndupl * sizeof (int)); + + for (i = 1; i <= ndupl; i++) + scale_step[i - 1] = TEST_BIT (wont_exit, i) + ? prob_pass_wont_exit + : prob_pass_thru; + + if (is_latch) + { + prob_pass_main = TEST_BIT (wont_exit, 0) + ? prob_pass_wont_exit + : prob_pass_thru; + p = prob_pass_main; + scale_main = REG_BR_PROB_BASE; + for (i = 0; i < ndupl; i++) + { + scale_main += p; + p = RDIV (p * scale_step[i], REG_BR_PROB_BASE); + } + scale_main = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE, scale_main); + scale_act = RDIV (scale_main * prob_pass_main, REG_BR_PROB_BASE); + } + else + { + scale_main = REG_BR_PROB_BASE; + for (i = 0; i < ndupl; i++) + scale_main = RDIV (scale_main * scale_step[i], REG_BR_PROB_BASE); + scale_act = REG_BR_PROB_BASE - prob_pass_thru; + } + for (i = 0; i < ndupl; i++) + if (scale_step[i] < 0 || scale_step[i] > REG_BR_PROB_BASE) + abort (); + if (scale_main < 0 || scale_main > REG_BR_PROB_BASE + || scale_act < 0 || scale_act > REG_BR_PROB_BASE) + abort (); + } + + /* Loop the new bbs will belong to. */ + target = find_common_loop (e->src->loop_father, e->dest->loop_father); + + /* Original loops. */ + n_orig_loops = 0; + for (aloop = loop->inner; aloop; aloop = aloop->next) + n_orig_loops++; + orig_loops = xcalloc (n_orig_loops, sizeof (struct loop *)); + for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++) + orig_loops[i] = aloop; + + loop->copy = target; + + /* Original basic blocks. */ + n = loop->num_nodes; + + first_active = xcalloc(n, sizeof (basic_block)); + if (is_latch) + { + memcpy (first_active, bbs, n * sizeof (basic_block)); + first_active_latch = latch; + } + + /* Record exit edges in original loop body. */ + if (TEST_BIT (wont_exit, 0)) + record_exit_edges (orig, bbs, n, to_remove, n_to_remove, true); + + for (j = 0; j < ndupl; j++) + { + /* Copy loops. */ + copy_loops_to (loops, orig_loops, n_orig_loops, target); + + /* Copy bbs. */ + copy_bbs (bbs, n, e, latch_edge, &new_bbs, loops, + &e, &he, add_irreducible_flag); + if (is_latch) + loop->latch = RBI (latch)->copy; + + /* Record exit edges in this copy. */ + if (TEST_BIT (wont_exit, j + 1)) + record_exit_edges (orig, new_bbs, n, to_remove, n_to_remove, false); + + /* Set counts and frequencies. */ + for (i = 0; i < n; i++) + { + new_bb = new_bbs[i]; + bb = bbs[i]; + + if (flags & DLTHE_FLAG_UPDATE_FREQ) + { + new_bb->count = RDIV (scale_act * bb->count, REG_BR_PROB_BASE); + new_bb->frequency = RDIV (scale_act * bb->frequency, + REG_BR_PROB_BASE); + } + else + { + new_bb->count = bb->count; + new_bb->frequency = bb->frequency; + } + + for (ae = new_bb->succ; ae; ae = ae->succ_next) + ae->count = RDIV (new_bb->count * ae->probability, + REG_BR_PROB_BASE); + } + if (flags & DLTHE_FLAG_UPDATE_FREQ) + scale_act = RDIV (scale_act * scale_step[j], REG_BR_PROB_BASE); + + if (!first_active_latch) + { + memcpy (first_active, new_bbs, n * sizeof (basic_block)); + first_active_latch = RBI (latch)->copy; + } + + free (new_bbs); + + /* Original loop header is dominated by latch copy + if we duplicated on its only entry edge. */ + if (!is_latch && !header->pred->pred_next->pred_next) + set_immediate_dominator (loops->cfg.dom, header, RBI (latch)->copy); + if (is_latch && j == 0) + { + /* Update edge from latch. */ + for (latch_edge = RBI (header)->copy->pred; + latch_edge->src != latch; + latch_edge = latch_edge->pred_next); + } + } + /* Now handle original loop. */ + + /* Update edge counts. */ + if (flags & DLTHE_FLAG_UPDATE_FREQ) + { + for (i = 0; i < n; i++) + { + bb = bbs[i]; + bb->count = RDIV (scale_main * bb->count, REG_BR_PROB_BASE); + bb->frequency = RDIV (scale_main * bb->frequency, REG_BR_PROB_BASE); + for (ae = bb->succ; ae; ae = ae->succ_next) + ae->count = RDIV (bb->count * ae->probability, REG_BR_PROB_BASE); + } + free (scale_step); + } + free (orig_loops); + + /* Update dominators of other blocks if affected. */ + for (i = 0; i < n; i++) + { + basic_block dominated, dom_bb, *dom_bbs; + int n_dom_bbs,j; + + bb = bbs[i]; + n_dom_bbs = get_dominated_by (loops->cfg.dom, bb, &dom_bbs); + for (j = 0; j < n_dom_bbs; j++) + { + dominated = dom_bbs[j]; + if (flow_bb_inside_loop_p (loop, dominated)) + continue; + dom_bb = nearest_common_dominator ( + loops->cfg.dom, first_active[i], first_active_latch); + set_immediate_dominator (loops->cfg.dom, dominated, dom_bb); + } + free (dom_bbs); + } + free (first_active); + + free (bbs); + + return true; +} + /* Creates a pre-header for a LOOP. Returns newly created block. Unless CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single entry; otherwise we also force preheader block to have only one successor. - */ + The function also updates dominators stored in DOM. */ static basic_block create_preheader (loop, dom, flags) struct loop *loop; @@ -131,8 +1293,8 @@ create_preheader (loop, dom, flags) return dummy; } -/* Create preheaders for each loop; for meaning of flags see - create_preheader. */ +/* Create preheaders for each loop from loop tree stored in LOOPS; for meaning + of FLAGS see create_preheader. */ void create_preheaders (loops, flags) struct loops *loops; @@ -144,7 +1306,8 @@ create_preheaders (loops, flags) loops->state |= LOOPS_HAVE_PREHEADERS; } -/* Forces all loop latches to have only single successor. */ +/* Forces all loop latches of loops from loop tree LOOPS to have only single + successor. */ void force_single_succ_latches (loops) struct loops *loops; @@ -167,9 +1330,10 @@ force_single_succ_latches (loops) loops->state |= LOOPS_HAVE_SIMPLE_LATCHES; } -/* A quite stupid function to put INSNS on E. They are supposed to form - just one basic block. Jumps out are not handled, so cfg do not have to - be ok after this function. */ +/* A quite stupid function to put INSNS on edge E. They are supposed to form + just one basic block. Jumps in INSNS are not handled, so cfg do not have to + be ok after this function. The created block is placed on correct place + in LOOPS structure and its dominator is set. */ basic_block loop_split_edge_with (e, insns, loops) edge e; diff --git a/gcc/loop-init.c b/gcc/loop-init.c new file mode 100644 index 00000000000..44b9d61d5b8 --- /dev/null +++ b/gcc/loop-init.c @@ -0,0 +1,116 @@ +/* Loop optimizer initialization routines. + Copyright (C) 2002, 2003 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "cfgloop.h" +#include "cfglayout.h" +#include "gcov-io.h" +#include "profile.h" + +/* Initialize loop optimizer. */ + +struct loops * +loop_optimizer_init (dumpfile) + FILE *dumpfile; +{ + struct loops *loops = xcalloc (1, sizeof (struct loops)); + edge e; + + /* Avoid annoying special cases of edges going to exit + block. */ + for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next) + if ((e->flags & EDGE_FALLTHRU) && e->src->succ->succ_next) + split_edge (e); + + /* Find the loops. */ + + if (flow_loops_find (loops, LOOP_TREE) <= 1) + { + /* No loops. */ + flow_loops_free (loops); + free (loops); + return NULL; + } + + /* Not going to update these. */ + free (loops->cfg.rc_order); + loops->cfg.rc_order = NULL; + free (loops->cfg.dfs_order); + loops->cfg.dfs_order = NULL; + + /* Initialize structures for layout changes. */ + cfg_layout_initialize (loops); + + /* Create pre-headers. */ + create_preheaders (loops, CP_SIMPLE_PREHEADERS | CP_INSIDE_CFGLAYOUT); + + /* Force all latches to have only single successor. */ + force_single_succ_latches (loops); + + /* Mark irreducible loops. */ + mark_irreducible_loops (loops); + + /* Dump loops. */ + flow_loops_dump (loops, dumpfile, NULL, 1); + +#ifdef ENABLE_CHECKING + verify_dominators (loops->cfg.dom); + verify_loop_structure (loops); +#endif + + return loops; +} + +/* Finalize loop optimizer. */ +void +loop_optimizer_finalize (loops, dumpfile) + struct loops *loops; + FILE *dumpfile; +{ + basic_block bb; + + /* Finalize layout changes. */ + /* Make chain. */ + FOR_EACH_BB (bb) + if (bb->next_bb != EXIT_BLOCK_PTR) + RBI (bb)->next = bb->next_bb; + + /* Another dump. */ + flow_loops_dump (loops, dumpfile, NULL, 1); + + /* Clean up. */ + flow_loops_free (loops); + free (loops); + + /* Finalize changes. */ + cfg_layout_finalize (); + + /* Checking. */ +#ifdef ENABLE_CHECKING + verify_flow_info (); +#endif +} + diff --git a/gcc/loop-unswitch.c b/gcc/loop-unswitch.c new file mode 100644 index 00000000000..8d6654c520e --- /dev/null +++ b/gcc/loop-unswitch.c @@ -0,0 +1,412 @@ +/* Loop unswitching for GNU compiler. + Copyright (C) 2002 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "cfgloop.h" +#include "cfglayout.h" +#include "params.h" +#include "output.h" +#include "expr.h" + +/* This pass moves constant conditions out of loops, duplicating the loop + in progres, i.e. this code: + + while (loop_cond) + { + A; + if (cond) + branch1; + else + branch2; + B; + if (cond) + branch3; + C; + } + where nothing inside the loop alters cond is transformed + into + + if (cond) + { + while (loop_cond) + { + A; + branch1; + B; + branch3; + C; + } + } + else + { + while (loop_cond) + { + A; + branch2; + B; + C; + } + } + + Duplicating the loop might lead to code growth exponential in number of + branches inside loop, so we limit the number of unswitchings performed + in a single loop to PARAM_MAX_UNSWITCH_LEVEL. We only perform the + transformation on innermost loops, as the benefit of doing it on loops + containing subloops would not be very large compared to complications + with handling this case. */ + +static struct loop *unswitch_loop PARAMS ((struct loops *, + struct loop *, basic_block)); +static void unswitch_single_loop PARAMS ((struct loops *, struct loop *, + rtx, int)); +static bool may_unswitch_on_p PARAMS ((struct loops *, basic_block, + struct loop *, basic_block *)); +static rtx reversed_condition PARAMS ((rtx)); + +/* Main entry point. Perform loop unswitching on all suitable LOOPS. */ +void +unswitch_loops (loops) + struct loops *loops; +{ + int i, num; + struct loop *loop; + + /* Go through inner loops (only original ones). */ + num = loops->num; + + for (i = 1; i < num; i++) + { + /* Removed loop? */ + loop = loops->parray[i]; + if (!loop) + continue; + + if (loop->inner) + continue; + + unswitch_single_loop (loops, loop, NULL_RTX, 0); +#ifdef ENABLE_CHECKING + verify_dominators (loops->cfg.dom); + verify_loop_structure (loops); +#endif + } +} + +/* Checks whether we can unswitch LOOP on condition at end of BB -- one of its + basic blocks (for what it means see comments below). List of basic blocks + inside LOOP is provided in BODY to save time. */ +static bool +may_unswitch_on_p (loops, bb, loop, body) + struct loops *loops; + basic_block bb; + struct loop *loop; + basic_block *body; +{ + rtx test; + unsigned i; + + /* BB must end in a simple conditional jump. */ + if (!bb->succ || !bb->succ->succ_next || bb->succ->succ_next->succ_next) + return false; + if (!any_condjump_p (bb->end)) + return false; + + /* With branches inside loop. */ + if (!flow_bb_inside_loop_p (loop, bb->succ->dest) + || !flow_bb_inside_loop_p (loop, bb->succ->succ_next->dest)) + return false; + + /* It must be executed just once each iteration (because otherwise we + are unable to update dominator/irreducible loop information correctly). */ + if (!just_once_each_iteration_p (loops, loop, bb)) + return false; + + /* Condition must be invariant. We use just a stupid test of invariantness + of the condition: all used regs must not be modified inside loop body. */ + test = get_condition (bb->end, NULL); + if (!test) + return false; + + for (i = 0; i < loop->num_nodes; i++) + if (modified_between_p (test, body[i]->head, NEXT_INSN (body[i]->end))) + return false; + + return true; +} + +/* Reverses CONDition; returns NULL if we cannot. */ +static rtx +reversed_condition (cond) + rtx cond; +{ + enum rtx_code reversed; + reversed = reversed_comparison_code (cond, NULL); + if (reversed == UNKNOWN) + return NULL_RTX; + else + return gen_rtx_fmt_ee (reversed, + GET_MODE (cond), XEXP (cond, 0), + XEXP (cond, 1)); +} + +/* Unswitch single LOOP. COND_CHECKED holds list of conditions we already + unswitched on and are therefore known to be true in this LOOP. NUM is + number of unswitchings done; do not allow it to grow too much, it is too + easy to create example on that the code would grow exponentially. */ +static void +unswitch_single_loop (loops, loop, cond_checked, num) + struct loops *loops; + struct loop *loop; + rtx cond_checked; + int num; +{ + basic_block *bbs, bb; + struct loop *nloop; + unsigned i; + int true_first; + rtx cond, rcond, conds, rconds, acond, split_before; + int always_true; + int always_false; + int repeat; + edge e; + + /* Do not unswitch too much. */ + if (num > PARAM_VALUE (PARAM_MAX_UNSWITCH_LEVEL)) + { + if (rtl_dump_file) + fprintf (rtl_dump_file, ";; Not unswitching anymore, hit max level\n"); + return; + } + + /* Only unswitch innermost loops. */ + if (loop->inner) + { + if (rtl_dump_file) + fprintf (rtl_dump_file, ";; Not unswitching, not innermost loop\n"); + return; + } + + /* We must be able to duplicate loop body. */ + if (!can_duplicate_loop_p (loop)) + { + if (rtl_dump_file) + fprintf (rtl_dump_file, ";; Not unswitching, can't duplicate loop\n"); + return; + } + + /* The loop should not be too large, to limit code growth. */ + if (num_loop_insns (loop) > PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS)) + { + if (rtl_dump_file) + fprintf (rtl_dump_file, ";; Not unswitching, loop too big\n"); + return; + } + + /* Do not unswitch in cold areas. */ + if (!maybe_hot_bb_p (loop->header)) + { + if (rtl_dump_file) + fprintf (rtl_dump_file, ";; Not unswitching, not hot area\n"); + return; + } + + /* Nor if the loop usually does not roll. */ + if (expected_loop_iterations (loop) < 1) + { + if (rtl_dump_file) + fprintf (rtl_dump_file, ";; Not unswitching, loop iterations < 1\n"); + return; + } + + do + { + repeat = 0; + + /* Find a bb to unswitch on. */ + bbs = get_loop_body (loop); + for (i = 0; i < loop->num_nodes; i++) + if (may_unswitch_on_p (loops, bbs[i], loop, bbs)) + break; + + if (i == loop->num_nodes) + { + free (bbs); + return; + } + + if (!(cond = get_condition (bbs[i]->end, &split_before))) + abort (); + rcond = reversed_condition (cond); + + /* Check whether the result can be predicted. */ + always_true = 0; + always_false = 0; + for (acond = cond_checked; acond; acond = XEXP (acond, 1)) + { + if (rtx_equal_p (cond, XEXP (acond, 0))) + { + always_true = 1; + break; + } + if (rtx_equal_p (rcond, XEXP (acond, 0))) + { + always_false = 1; + break; + } + } + + if (always_true) + { + /* Remove false path. */ + for (e = bbs[i]->succ; !(e->flags & EDGE_FALLTHRU); e = e->succ_next); + remove_path (loops, e); + free (bbs); + repeat = 1; + } + else if (always_false) + { + /* Remove true path. */ + for (e = bbs[i]->succ; e->flags & EDGE_FALLTHRU; e = e->succ_next); + remove_path (loops, e); + free (bbs); + repeat = 1; + } + } while (repeat); + + /* We found the condition we can unswitch on. */ + conds = alloc_EXPR_LIST (0, cond, cond_checked); + if (rcond) + rconds = alloc_EXPR_LIST (0, rcond, cond_checked); + else + rconds = cond_checked; + + /* Separate condition in a single basic block. */ + bb = split_loop_bb (loops, bbs[i], PREV_INSN (split_before))->dest; + free (bbs); + true_first = !(bb->succ->flags & EDGE_FALLTHRU); + if (rtl_dump_file) + fprintf (rtl_dump_file, ";; Unswitching loop\n"); + + /* Unswitch the loop on this condition. */ + nloop = unswitch_loop (loops, loop, bb); + if (!nloop) + abort (); + + /* Invoke itself on modified loops. */ + unswitch_single_loop (loops, nloop, true_first ? conds : rconds, num + 1); + unswitch_single_loop (loops, loop, true_first ? rconds : conds, num + 1); + + free_EXPR_LIST_node (conds); + if (rcond) + free_EXPR_LIST_node (rconds); +} + +/* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON. We only support + unswitching of innermost loops. UNSWITCH_ON must be executed in every + iteration, i.e. it must dominate LOOP latch, and should only contain code + for the condition we unswitch on. Returns NULL if impossible, new + loop otherwise. */ +static struct loop * +unswitch_loop (loops, loop, unswitch_on) + struct loops *loops; + struct loop *loop; + basic_block unswitch_on; +{ + edge entry, e, latch_edge; + basic_block switch_bb, unswitch_on_alt, src; + struct loop *nloop; + sbitmap zero_bitmap; + int irred_flag; + + /* Some sanity checking. */ + if (!flow_bb_inside_loop_p (loop, unswitch_on)) + abort (); + if (!unswitch_on->succ || !unswitch_on->succ->succ_next || + unswitch_on->succ->succ_next->succ_next) + abort (); + if (!just_once_each_iteration_p (loops, loop, unswitch_on)) + abort (); + if (loop->inner) + abort (); + if (!flow_bb_inside_loop_p (loop, unswitch_on->succ->dest)) + abort (); + if (!flow_bb_inside_loop_p (loop, unswitch_on->succ->succ_next->dest)) + abort (); + + /* Will we be able to perform redirection? */ + if (!any_condjump_p (unswitch_on->end)) + return NULL; + if (!cfg_layout_can_duplicate_bb_p (unswitch_on)) + return NULL; + + entry = loop_preheader_edge (loop); + + /* Make a copy. */ + src = entry->src; + irred_flag = src->flags & BB_IRREDUCIBLE_LOOP; + src->flags &= ~BB_IRREDUCIBLE_LOOP; + zero_bitmap = sbitmap_alloc (2); + sbitmap_zero (zero_bitmap); + if (!duplicate_loop_to_header_edge (loop, entry, loops, 1, + zero_bitmap, NULL, NULL, NULL, 0)) + return NULL; + free (zero_bitmap); + src->flags |= irred_flag; + + /* Record the block with condition we unswitch on. */ + unswitch_on_alt = RBI (unswitch_on)->copy; + + /* Make a copy of the block containing the condition; we will use + it as switch to decide which loop we want to use. */ + switch_bb = cfg_layout_duplicate_bb (unswitch_on, NULL); + switch_bb->flags &= ~BB_IRREDUCIBLE_LOOP; + switch_bb->flags |= irred_flag; + add_to_dominance_info (loops->cfg.dom, switch_bb); + RBI (unswitch_on)->copy = unswitch_on_alt; + + /* Loopify from the copy of LOOP body, constructing the new loop. */ + for (latch_edge = RBI (loop->latch)->copy->succ; + latch_edge->dest != loop->header; + latch_edge = latch_edge->succ_next); + nloop = loopify (loops, latch_edge, + RBI (loop->header)->copy->pred, switch_bb); + + /* Remove branches that are now unreachable in new loops. We rely on the + fact that cfg_layout_duplicate_bb reverses list of edges. */ + for (e = unswitch_on->succ->succ_next->dest->pred; e; e = e->pred_next) + if (e->src != unswitch_on && + !dominated_by_p (loops->cfg.dom, e->src, e->dest)) + break; + remove_path (loops, unswitch_on->succ); + remove_path (loops, unswitch_on_alt->succ); + + /* One of created loops do not have to be subloop of the outer loop now, + so fix its placement in loop datastructure. */ + fix_loop_placement (loop); + fix_loop_placement (nloop); + + return nloop; +} diff --git a/gcc/params.def b/gcc/params.def index 70963f42e05..998b40d386b 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -151,6 +151,17 @@ DEFPARAM(PARAM_MAX_UNROLLED_INSNS, "The maximum number of instructions to consider to unroll in a loop", 100) +/* The maximum number of insns of an unswitched loop. */ +DEFPARAM(PARAM_MAX_UNSWITCH_INSNS, + "max-unswitch-insns", + "The maximum number of insns of an unswitched loop", + 50) +/* The maximum level of recursion in unswitch_single_loop. */ +DEFPARAM(PARAM_MAX_UNSWITCH_LEVEL, + "max-unswitch-level", + "The maximum number of unswitchings in a single loop", + 3) + DEFPARAM(HOT_BB_COUNT_FRACTION, "hot-bb-count-fraction", "Select fraction of the maximal count of repetitions of basic block in \ diff --git a/gcc/toplev.c b/gcc/toplev.c index 7b49110d925..840632a0315 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -239,6 +239,7 @@ enum dump_file_index DFI_bp, DFI_ce1, DFI_tracer, + DFI_loop2, DFI_cse2, DFI_life, DFI_combine, @@ -289,6 +290,7 @@ static struct dump_file_info dump_file[DFI_MAX] = { "bp", 'b', 1, 0, 0 }, { "ce1", 'C', 1, 0, 0 }, { "tracer", 'T', 1, 0, 0 }, + { "loop2", 'L', 1, 0, 0 }, { "cse2", 't', 1, 0, 0 }, { "life", 'f', 1, 0, 0 }, /* Yes, duplicate enable switch. */ { "combine", 'c', 1, 0, 0 }, @@ -519,6 +521,9 @@ int flag_unroll_loops; int flag_unroll_all_loops; +/* Nonzero enables loop unswitching. */ +int flag_unswitch_loops; + /* Nonzero enables prefetch optimizations for arrays in loops. */ int flag_prefetch_loop_arrays; @@ -997,6 +1002,8 @@ static const lang_independent_options f_options[] = N_("Perform loop unrolling when iteration count is known") }, {"unroll-all-loops", &flag_unroll_all_loops, 1, N_("Perform loop unrolling for all loops") }, + {"unswitch-loops", &flag_unswitch_loops, 1, + N_("Perform loop unswitching") }, {"prefetch-loop-arrays", &flag_prefetch_loop_arrays, 1, N_("Generate prefetch instructions, if available, for arrays in loops") }, {"move-all-movables", &flag_move_all_movables, 1, @@ -3057,6 +3064,38 @@ rest_of_compilation (decl) timevar_pop (TV_TRACER); } + /* Perform loop optimalizations. It might be better to do them a bit + sooner, but we want the profile feedback to work more efficiently. */ + if (optimize > 0 + && flag_unswitch_loops) + { + struct loops *loops; + timevar_push (TV_LOOP); + open_dump_file (DFI_loop2, decl); + if (rtl_dump_file) + dump_flow_info (rtl_dump_file); + + loops = loop_optimizer_init (rtl_dump_file); + + if (loops) + { + /* The optimalizations: */ + if (flag_unswitch_loops) + unswitch_loops (loops); + + loop_optimizer_finalize (loops, rtl_dump_file); + } + + cleanup_cfg (CLEANUP_EXPENSIVE); + delete_trivially_dead_insns (insns, max_reg_num ()); + reg_scan (insns, max_reg_num (), 0); + if (rtl_dump_file) + dump_flow_info (rtl_dump_file); + close_dump_file (DFI_loop2, print_rtl_with_bb, get_insns ()); + timevar_pop (TV_LOOP); + ggc_collect (); + } + if (flag_rerun_cse_after_loop) { timevar_push (TV_CSE2); @@ -4884,6 +4923,7 @@ parse_options_and_default_flags (argc, argv) { flag_inline_functions = 1; flag_rename_registers = 1; + flag_unswitch_loops = 1; } if (optimize < 2 || optimize_size) |