diff options
author | rakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-05-17 08:10:24 +0000 |
---|---|---|
committer | rakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-05-17 08:10:24 +0000 |
commit | 7e0311aec8e6ab97a323f24623f2d0da6bb2d779 (patch) | |
tree | 7157094906e2791038cab066565c2c9b9c4e3104 /gcc/tree-ssa-threadupdate.c | |
parent | 0561623326dda191b9fc4f21bfb6f4148a5ddde8 (diff) | |
download | gcc-7e0311aec8e6ab97a323f24623f2d0da6bb2d779.tar.gz |
* tree-vrp.c (finalize_jump_threads): Do not care about dominance info.
(execute_vrp): Preserve loops through jump threading.
* tree-ssa-threadupdate.c (thread_single_edge,
dbds_continue_enumeration_p, determine_bb_domination_status,
thread_through_loop_header): New functions.
(create_edge_and_update_destination_phis,
create_edge_and_update_destination_phis): Set loops for the new blocks.
(prune_undesirable_thread_requests): Removed.
(redirect_edges): Do not pretend that redirect_edge_and_branch can
create new blocks.
(thread_block): Do not call prune_undesirable_thread_requests.
Update loops.
(mark_threaded_blocks): Select edges to thread here.
(thread_through_all_blocks): Take may_peel_loop_headers argument.
Thread edges through loop headers independently.
* cfgloopmanip.c (create_preheader, mfb_keep_just): Export.
* tree-pass.h (TODO_mark_first_instance): New.
(first_pass_instance): Declare.
* cfghooks.c (duplicate_block): Put the block to the original loop
if copy is not specified.
* tree-ssa-dom.c (tree_ssa_dominator_optimize): Preserve loops through
jump threading. Pass may_peel_loop_headers to
thread_through_all_blocks according to first_pass_instance.
* cfgloop.h (create_preheader): Declare.
* tree-flow.h (thread_through_all_blocks): Declaration changed.
* basic-block.h (mfb_keep_just, mfb_kj_edge): Declare.
* passes.c (first_pass_instance): New variable.
(next_pass_1): Set TODO_mark_first_instance.
(execute_todo): Set first_pass_instance.
* gcc.dg/tree-ssa/ssa-dom-thread-2.c: New test.
* gcc.dg/vect/vect-102.c, gcc.dg/vect/vect-103.c,
gcc.dg/vect/vect-104.c: Use more complex construction to prevent vectorizing.
* gcc.dg/tree-ssa/pr21559.c: Update outcome.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@124786 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-ssa-threadupdate.c')
-rw-r--r-- | gcc/tree-ssa-threadupdate.c | 676 |
1 files changed, 429 insertions, 247 deletions
diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c index c6b52095716..22266bd355c 100644 --- a/gcc/tree-ssa-threadupdate.c +++ b/gcc/tree-ssa-threadupdate.c @@ -315,6 +315,7 @@ create_edge_and_update_destination_phis (struct redirection_data *rd) e->probability = REG_BR_PROB_BASE; e->count = rd->dup_block->count; + e->aux = rd->outgoing_edge->aux; /* If there are any PHI nodes at the destination of the outgoing edge from the duplicate block, then we will need to add a new argument @@ -385,199 +386,6 @@ fixup_template_block (void **slot, void *data) return 1; } -/* Not all jump threading requests are useful. In particular some - jump threading requests can create irreducible regions which are - undesirable. - - This routine will examine the BB's incoming edges for jump threading - requests which, if acted upon, would create irreducible regions. Any - such jump threading requests found will be pruned away. */ - -static void -prune_undesirable_thread_requests (basic_block bb) -{ - edge e; - edge_iterator ei; - bool may_create_irreducible_region = false; - unsigned int num_outgoing_edges_into_loop = 0; - - /* For the heuristics below, we need to know if BB has more than - one outgoing edge into a loop. */ - FOR_EACH_EDGE (e, ei, bb->succs) - num_outgoing_edges_into_loop += ((e->flags & EDGE_LOOP_EXIT) == 0); - - if (num_outgoing_edges_into_loop > 1) - { - edge backedge = NULL; - - /* Consider the effect of threading the edge (0, 1) to 2 on the left - CFG to produce the right CFG: - - - 0 0 - | | - 1<--+ 2<--------+ - / \ | | | - 2 3 | 4<----+ | - \ / | / \ | | - 4---+ E 1-- | --+ - | | | - E 3---+ - - - Threading the (0, 1) edge to 2 effectively creates two loops - (2, 4, 1) and (4, 1, 3) which are neither disjoint nor nested. - This is not good. - - However, we do need to be able to thread (0, 1) to 2 or 3 - in the left CFG below (which creates the middle and right - CFGs with nested loops). - - 0 0 0 - | | | - 1<--+ 2<----+ 3<-+<-+ - /| | | | | | | - 2 | | 3<-+ | 1--+ | - \| | | | | | | - 3---+ 1--+--+ 2-----+ - - - A safe heuristic appears to be to only allow threading if BB - has a single incoming backedge from one of its direct successors. */ - - FOR_EACH_EDGE (e, ei, bb->preds) - { - if (e->flags & EDGE_DFS_BACK) - { - if (backedge) - { - backedge = NULL; - break; - } - else - { - backedge = e; - } - } - } - - if (backedge && find_edge (bb, backedge->src)) - ; - else - may_create_irreducible_region = true; - } - else - { - edge dest = NULL; - - /* If we thread across the loop entry block (BB) into the - loop and BB is still reached from outside the loop, then - we would create an irreducible CFG. Consider the effect - of threading the edge (1, 4) to 5 on the left CFG to produce - the right CFG - - 0 0 - / \ / \ - 1 2 1 2 - \ / | | - 4<----+ 5<->4 - / \ | | - E 5---+ E - - - Threading the (1, 4) edge to 5 creates two entry points - into the loop (4, 5) (one from block 1, the other from - block 2). A classic irreducible region. - - So look at all of BB's incoming edges which are not - backedges and which are not threaded to the loop exit. - If that subset of incoming edges do not all thread - to the same block, then threading any of them will create - an irreducible region. */ - - FOR_EACH_EDGE (e, ei, bb->preds) - { - edge e2; - - /* We ignore back edges for now. This may need refinement - as threading a backedge creates an inner loop which - we would need to verify has a single entry point. - - If all backedges thread to new locations, then this - block will no longer have incoming backedges and we - need not worry about creating irreducible regions - by threading through BB. I don't think this happens - enough in practice to worry about it. */ - if (e->flags & EDGE_DFS_BACK) - continue; - - /* If the incoming edge threads to the loop exit, then it - is clearly safe. */ - e2 = e->aux; - if (e2 && (e2->flags & EDGE_LOOP_EXIT)) - continue; - - /* E enters the loop header and is not threaded. We can - not allow any other incoming edges to thread into - the loop as that would create an irreducible region. */ - if (!e2) - { - may_create_irreducible_region = true; - break; - } - - /* We know that this incoming edge threads to a block inside - the loop. This edge must thread to the same target in - the loop as any previously seen threaded edges. Otherwise - we will create an irreducible region. */ - if (!dest) - dest = e2; - else if (e2 != dest) - { - may_create_irreducible_region = true; - break; - } - } - } - - /* If we might create an irreducible region, then cancel any of - the jump threading requests for incoming edges which are - not backedges and which do not thread to the exit block. */ - if (may_create_irreducible_region) - { - FOR_EACH_EDGE (e, ei, bb->preds) - { - edge e2; - - /* Ignore back edges. */ - if (e->flags & EDGE_DFS_BACK) - continue; - - e2 = e->aux; - - /* If this incoming edge was not threaded, then there is - nothing to do. */ - if (!e2) - continue; - - /* If this incoming edge threaded to the loop exit, - then it can be ignored as it is safe. */ - if (e2->flags & EDGE_LOOP_EXIT) - continue; - - if (e2) - { - /* This edge threaded into the loop and the jump thread - request must be cancelled. */ - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " Not threading jump %d --> %d to %d\n", - e->src->index, e->dest->index, e2->dest->index); - e->aux = NULL; - } - } - } -} - /* Hash table traversal callback to redirect each incoming edge associated with this hash table element to its new destination. */ @@ -620,11 +428,8 @@ redirect_edges (void **slot, void *data) /* Redirect the incoming edge to the appropriate duplicate block. */ e2 = redirect_edge_and_branch (e, rd->dup_block); + gcc_assert (e == e2); flush_pending_stmts (e2); - - if ((dump_file && (dump_flags & TDF_DETAILS)) - && e->src != e2->src) - fprintf (dump_file, " basic block %d created\n", e2->src->index); } else { @@ -696,46 +501,23 @@ redirection_block_p (basic_block bb) successor of BB. We then revector the incoming edges into BB to the appropriate duplicate of BB. - BB and its duplicates will have assignments to the same set of - SSA_NAMEs. Right now, we just call into update_ssa to update the - SSA graph for those names. - - We are also going to experiment with a true incremental update - scheme for the duplicated resources. One of the interesting - properties we can exploit here is that all the resources set - in BB will have the same IDFS, so we have one IDFS computation - per block with incoming threaded edges, which can lower the - cost of the true incremental update algorithm. */ + If NOLOOP_ONLY is true, we only perform the threading as long as it + does not affect the structure of the loops in a nontrivial way. */ static bool -thread_block (basic_block bb) +thread_block (basic_block bb, bool noloop_only) { /* E is an incoming edge into BB that we may or may not want to redirect to a duplicate of BB. */ - edge e; + edge e, e2; edge_iterator ei; struct local_info local_info; - - /* FOUND_BACKEDGE indicates that we found an incoming backedge - into BB, in which case we may ignore certain jump threads - to avoid creating irreducible regions. */ - bool found_backedge = false; + struct loop *loop = bb->loop_father; /* ALL indicates whether or not all incoming edges into BB should be threaded to a duplicate of BB. */ bool all = true; - /* If optimizing for size, only thread this block if we don't have - to duplicate it or it's an otherwise empty redirection block. */ - if (optimize_size - && EDGE_COUNT (bb->preds) > 1 - && !redirection_block_p (bb)) - { - FOR_EACH_EDGE (e, ei, bb->preds) - e->aux = NULL; - return false; - } - /* To avoid scanning a linear array for the element we need we instead use a hash table. For normal code there should be no noticeable difference. However, if we have a block with a large number of @@ -745,35 +527,45 @@ thread_block (basic_block bb) redirection_data_eq, free); - FOR_EACH_EDGE (e, ei, bb->preds) - found_backedge |= ((e->flags & EDGE_DFS_BACK) != 0); + /* If we thread the latch of the loop to its exit, the loop ceases to + exist. Make sure we do not restrict ourselves in order to preserve + this loop. */ + if (current_loops && loop->header == bb) + { + e = loop_latch_edge (loop); + e2 = e->aux; - /* If BB has incoming backedges, then threading across BB might - introduce an irreducible region, which would be undesirable - as that inhibits various optimizations later. Prune away - any jump threading requests which we know will result in - an irreducible region. */ - if (found_backedge) - prune_undesirable_thread_requests (bb); + if (e2 && loop_exit_edge_p (loop, e2)) + { + loop->header = NULL; + loop->latch = NULL; + } + } /* Record each unique threaded destination into a hash table for efficient lookups. */ FOR_EACH_EDGE (e, ei, bb->preds) { - if (!e->aux) + e2 = e->aux; + + if (!e2 + /* If NOLOOP_ONLY is true, we only allow threading through the + header of a loop to exit edges. */ + || (noloop_only + && current_loops + && bb == bb->loop_father->header + && !loop_exit_edge_p (bb->loop_father, e2))) { all = false; + continue; } - else - { - edge e2 = e->aux; - update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e), - e->count, e->aux); - /* Insert the outgoing edge into the hash table if it is not - already in the hash table. */ - lookup_redirection_data (e2, e, INSERT); - } + update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e), + e->count, e->aux); + + /* Insert the outgoing edge into the hash table if it is not + already in the hash table. */ + lookup_redirection_data (e2, e, INSERT); } /* If we are going to thread all incoming edges to an outgoing edge, then @@ -821,6 +613,339 @@ thread_block (basic_block bb) return local_info.jumps_threaded; } +/* Threads edge E through E->dest to the edge E->aux. Returns the copy + of E->dest created during threading, or E->dest if it was not necessary + to copy it (E is its single predecessor). */ + +static basic_block +thread_single_edge (edge e) +{ + basic_block bb = e->dest; + edge eto = e->aux; + struct redirection_data rd; + struct local_info local_info; + + e->aux = NULL; + + thread_stats.num_threaded_edges++; + + if (single_pred_p (bb)) + { + /* If BB has just a single predecessor, we should only remove the + control statements at its end, and successors except for ETO. */ + remove_ctrl_stmt_and_useless_edges (bb, eto->dest); + return bb; + } + + /* Otherwise, we need to create a copy. */ + update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto); + + local_info.bb = bb; + rd.outgoing_edge = eto; + + create_block_for_threading (bb, &rd); + create_edge_and_update_destination_phis (&rd); + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Threaded jump %d --> %d to %d\n", + e->src->index, e->dest->index, rd.dup_block->index); + + rd.dup_block->count = e->count; + rd.dup_block->frequency = EDGE_FREQUENCY (e); + single_succ_edge (rd.dup_block)->count = e->count; + redirect_edge_and_branch (e, rd.dup_block); + flush_pending_stmts (e); + + return rd.dup_block; +} + +/* Callback for dfs_enumerate_from. Returns true if BB is different + from STOP and DBDS_CE_STOP. */ + +static basic_block dbds_ce_stop; +static bool +dbds_continue_enumeration_p (basic_block bb, void *stop) +{ + return (bb != (basic_block) stop + && bb != dbds_ce_stop); +} + +/* Evaluates the dominance relationship of latch of the LOOP and BB, and + returns the state. */ + +enum bb_dom_status +{ + /* BB does not dominate latch of the LOOP. */ + DOMST_NONDOMINATING, + /* The LOOP is broken (there is no path from the header to its latch. */ + DOMST_LOOP_BROKEN, + /* BB dominates the latch of the LOOP. */ + DOMST_DOMINATING +}; + +static enum bb_dom_status +determine_bb_domination_status (struct loop *loop, basic_block bb) +{ + basic_block *bblocks; + unsigned nblocks, i; + bool bb_reachable = false; + edge_iterator ei; + edge e; + +#ifdef ENABLE_CHECKING + /* This function assumes BB is a successor of LOOP->header. */ + { + bool ok = false; + + FOR_EACH_EDGE (e, ei, bb->preds) + { + if (e->src == loop->header) + { + ok = true; + break; + } + } + + gcc_assert (ok); + } +#endif + + if (bb == loop->latch) + return DOMST_DOMINATING; + + /* Check that BB dominates LOOP->latch, and that it is back-reachable + from it. */ + + bblocks = XCNEWVEC (basic_block, loop->num_nodes); + dbds_ce_stop = loop->header; + nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p, + bblocks, loop->num_nodes, bb); + for (i = 0; i < nblocks; i++) + FOR_EACH_EDGE (e, ei, bblocks[i]->preds) + { + if (e->src == loop->header) + { + free (bblocks); + return DOMST_NONDOMINATING; + } + if (e->src == bb) + bb_reachable = true; + } + + free (bblocks); + return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN); +} + +/* Thread jumps through the header of LOOP. Returns true if cfg changes. + If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges + to the inside of the loop. */ + +static bool +thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers) +{ + basic_block header = loop->header; + edge e, tgt_edge, latch = loop_latch_edge (loop); + edge_iterator ei; + basic_block tgt_bb, atgt_bb; + enum bb_dom_status domst; + + /* We have already threaded through headers to exits, so all the threading + requests now are to the inside of the loop. We need to avoid creating + irreducible regions (i.e., loops with more than one entry block), and + also loop with several latch edges, or new subloops of the loop (although + there are cases where it might be appropriate, it is difficult to decide, + and doing it wrongly may confuse other optimizers). + + We could handle more general cases here. However, the intention is to + preserve some information about the loop, which is impossible if its + structure changes significantly, in a way that is not well understood. + Thus we only handle few important special cases, in which also updating + of the loop-carried information should be feasible: + + 1) Propagation of latch edge to a block that dominates the latch block + of a loop. This aims to handle the following idiom: + + first = 1; + while (1) + { + if (first) + initialize; + first = 0; + body; + } + + After threading the latch edge, this becomes + + first = 1; + if (first) + initialize; + while (1) + { + first = 0; + body; + } + + The original header of the loop is moved out of it, and we may thread + the remaining edges through it without further constraints. + + 2) All entry edges are propagated to a single basic block that dominates + the latch block of the loop. This aims to handle the following idiom + (normally created for "for" loops): + + i = 0; + while (1) + { + if (i >= 100) + break; + body; + i++; + } + + This becomes + + i = 0; + while (1) + { + body; + i++; + if (i >= 100) + break; + } + */ + + /* Threading through the header won't improve the code if the header has just + one successor. */ + if (single_succ_p (header)) + goto fail; + + if (latch->aux) + { + tgt_edge = latch->aux; + tgt_bb = tgt_edge->dest; + } + else if (!may_peel_loop_headers + && !redirection_block_p (loop->header)) + goto fail; + else + { + tgt_bb = NULL; + tgt_edge = NULL; + FOR_EACH_EDGE (e, ei, header->preds) + { + if (!e->aux) + { + if (e == latch) + continue; + + /* If latch is not threaded, and there is a header + edge that is not threaded, we would create loop + with multiple entries. */ + goto fail; + } + + tgt_edge = e->aux; + atgt_bb = tgt_edge->dest; + if (!tgt_bb) + tgt_bb = atgt_bb; + /* Two targets of threading would make us create loop + with multiple entries. */ + else if (tgt_bb != atgt_bb) + goto fail; + } + + if (!tgt_bb) + { + /* There are no threading requests. */ + return false; + } + + /* Redirecting to empty loop latch is useless. */ + if (tgt_bb == loop->latch + && empty_block_p (loop->latch)) + goto fail; + } + + /* The target block must dominate the loop latch, otherwise we would be + creating a subloop. */ + domst = determine_bb_domination_status (loop, tgt_bb); + if (domst == DOMST_NONDOMINATING) + goto fail; + if (domst == DOMST_LOOP_BROKEN) + { + /* If the loop ceased to exist, mark it as such, and thread through its + original header. */ + loop->header = NULL; + loop->latch = NULL; + return thread_block (header, false); + } + + if (tgt_bb->loop_father->header == tgt_bb) + { + /* If the target of the threading is a header of a subloop, we need + to create a preheader for it, so that the headers of the two loops + do not merge. */ + if (EDGE_COUNT (tgt_bb->preds) > 2) + { + tgt_bb = create_preheader (tgt_bb->loop_father, 0); + gcc_assert (tgt_bb != NULL); + } + else + tgt_bb = split_edge (tgt_edge); + } + + if (latch->aux) + { + /* First handle the case latch edge is redirected. */ + loop->latch = thread_single_edge (latch); + gcc_assert (single_succ (loop->latch) == tgt_bb); + loop->header = tgt_bb; + + /* Thread the remaining edges through the former header. */ + thread_block (header, false); + } + else + { + basic_block new_preheader; + + /* Now consider the case entry edges are redirected to the new entry + block. Remember one entry edge, so that we can find the new + preheader (its destination after threading). */ + FOR_EACH_EDGE (e, ei, header->preds) + { + if (e->aux) + break; + } + + /* The duplicate of the header is the new preheader of the loop. Ensure + that it is placed correctly in the loop hierarchy. */ + loop->copy = loop_outer (loop); + + thread_block (header, false); + loop->copy = NULL; + new_preheader = e->dest; + + /* Create the new latch block. This is always necessary, as the latch + must have only a single successor, but the original header had at + least two successors. */ + loop->latch = NULL; + mfb_kj_edge = single_succ_edge (new_preheader); + loop->header = mfb_kj_edge->dest; + latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL); + loop->header = latch->dest; + loop->latch = latch->src; + } + + return true; + +fail: + /* We failed to thread anything. Cancel the requests. */ + FOR_EACH_EDGE (e, ei, header->preds) + { + e->aux = NULL; + } + return false; +} + /* Walk through the registered jump threads and convert them into a form convenient for this pass. @@ -838,6 +963,11 @@ static void mark_threaded_blocks (bitmap threaded_blocks) { unsigned int i; + bitmap_iterator bi; + bitmap tmp = BITMAP_ALLOC (NULL); + basic_block bb; + edge e; + edge_iterator ei; for (i = 0; i < VEC_length (edge, threaded_edges); i += 2) { @@ -845,8 +975,30 @@ mark_threaded_blocks (bitmap threaded_blocks) edge e2 = VEC_index (edge, threaded_edges, i + 1); e->aux = e2; - bitmap_set_bit (threaded_blocks, e->dest->index); + bitmap_set_bit (tmp, e->dest->index); + } + + /* If optimizing for size, only thread through block if we don't have + to duplicate it or it's an otherwise empty redirection block. */ + if (optimize_size) + { + EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) + { + bb = BASIC_BLOCK (i); + if (EDGE_COUNT (bb->preds) > 1 + && !redirection_block_p (bb)) + { + FOR_EACH_EDGE (e, ei, bb->preds) + e->aux = NULL; + } + else + bitmap_set_bit (threaded_blocks, i); + } } + else + bitmap_copy (threaded_blocks, tmp); + + BITMAP_FREE(tmp); } @@ -856,15 +1008,20 @@ mark_threaded_blocks (bitmap threaded_blocks) It is the caller's responsibility to fix the dominance information and rewrite duplicated SSA_NAMEs back into SSA form. + If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through + loop headers if it does not simplify the loop. + Returns true if one or more edges were threaded, false otherwise. */ bool -thread_through_all_blocks (void) +thread_through_all_blocks (bool may_peel_loop_headers) { bool retval = false; unsigned int i; bitmap_iterator bi; bitmap threaded_blocks; + struct loop *loop; + loop_iterator li; if (threaded_edges == NULL) return false; @@ -874,14 +1031,38 @@ thread_through_all_blocks (void) mark_threaded_blocks (threaded_blocks); + if (current_loops) + FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) + loop->copy = NULL; + + /* First perform the threading requests that do not affect + loop structure. */ EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi) { basic_block bb = BASIC_BLOCK (i); if (EDGE_COUNT (bb->preds) > 0) - retval |= thread_block (bb); + retval |= thread_block (bb, true); + } + + /* Then perform the threading through loop headers. We start with the + innermost loop, so that the changes in cfg we perform won't affect + further threading. */ + if (current_loops) + { + FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) + { + if (!loop->header + || !bitmap_bit_p (threaded_blocks, loop->header->index)) + continue; + + retval |= thread_through_loop_header (loop, may_peel_loop_headers); + } } + if (retval) + free_dominance_info (CDI_DOMINATORS); + if (dump_file && (dump_flags & TDF_STATS)) fprintf (dump_file, "\nJumps threaded: %lu\n", thread_stats.num_threaded_edges); @@ -890,6 +1071,7 @@ thread_through_all_blocks (void) threaded_blocks = NULL; VEC_free (edge, heap, threaded_edges); threaded_edges = NULL; + return retval; } |