diff options
Diffstat (limited to 'gcc/tree-ssa-threadupdate.c')
-rw-r--r-- | gcc/tree-ssa-threadupdate.c | 354 |
1 files changed, 250 insertions, 104 deletions
diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c index e819d65e030..777fe41033b 100644 --- a/gcc/tree-ssa-threadupdate.c +++ b/gcc/tree-ssa-threadupdate.c @@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-phinodes.h" #include "tree-ssa.h" #include "tree-ssa-threadupdate.h" +#include "ssa-iterators.h" #include "dumpfile.h" #include "cfgloop.h" #include "hash-table.h" @@ -73,19 +74,16 @@ along with GCC; see the file COPYING3. If not see set of unique destination blocks that the incoming edges should be threaded to. - Block duplication can be further minimized by using B instead of - creating B' for one destination if all edges into B are going to be - threaded to a successor of B. We had code to do this at one time, but - I'm not convinced it is correct with the changes to avoid mucking up - the loop structure (which may cancel threading requests, thus a block - which we thought was going to become unreachable may still be reachable). - This code was also going to get ugly with the introduction of the ability - for a single jump thread request to bypass multiple blocks. + We reduce the number of edges and statements we create by not copying all + the outgoing edges and the control statement in step #1. We instead create + a template block without the outgoing edges and duplicate the template. - We further reduce the number of edges and statements we create by - not copying all the outgoing edges and the control statement in - step #1. We instead create a template block without the outgoing - edges and duplicate the template. */ + Another case this code handles is threading through a "joiner" block. In + this case, we do not know the destination of the joiner block, but one + of the outgoing edges from the joiner block leads to a threadable path. This + case largely works as outlined above, except the duplicate of the joiner + block still contains a full set of outgoing edges and its control statement. + We just redirect one of its outgoing edges to our jump threading path. */ /* Steps #5 and #6 of the above algorithm are best implemented by walking @@ -115,9 +113,20 @@ struct el struct redirection_data : typed_free_remove<redirection_data> { - /* A duplicate of B with the trailing control statement removed and which - targets a single successor of B. */ - basic_block dup_block; + /* We support wiring up two block duplicates in a jump threading path. + + One is a normal block copy where we remove the control statement + and wire up its single remaining outgoing edge to the thread path. + + The other is a joiner block where we leave the control statement + in place, but wire one of the outgoing edges to a thread path. + + In theory we could have multiple block duplicates in a jump + threading path, but I haven't tried that. + + The duplicate blocks appear in this array in the same order in + which they appear in the jump thread path. */ + basic_block dup_blocks[2]; /* The jump threading path. */ vec<jump_thread_edge *> *path; @@ -171,8 +180,11 @@ struct ssa_local_info_t /* The current block we are working on. */ basic_block bb; - /* A template copy of BB with no outgoing edges or control statement that - we use for creating copies. */ + /* We only create a template block for the first duplicated block in a + jump threading path as we may need many duplicates of that block. + + The second duplicate block in a path is specific to that path. Creating + and sharing a template for that block is considerably more difficult. */ basic_block template_block; /* TRUE if we thread one or more jumps, FALSE otherwise. */ @@ -234,24 +246,27 @@ remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb) } } -/* Create a duplicate of BB. Record the duplicate block in RD. */ +/* Create a duplicate of BB. Record the duplicate block in an array + indexed by COUNT stored in RD. */ static void -create_block_for_threading (basic_block bb, struct redirection_data *rd) +create_block_for_threading (basic_block bb, + struct redirection_data *rd, + unsigned int count) { edge_iterator ei; edge e; /* We can use the generic block duplication code and simply remove the stuff we do not need. */ - rd->dup_block = duplicate_block (bb, NULL, NULL); + rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL); - FOR_EACH_EDGE (e, ei, rd->dup_block->succs) + FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs) e->aux = NULL; /* Zero out the profile, since the block is unreachable for now. */ - rd->dup_block->frequency = 0; - rd->dup_block->count = 0; + rd->dup_blocks[count]->frequency = 0; + rd->dup_blocks[count]->count = 0; } /* Main data structure to hold information for duplicates of BB. */ @@ -275,7 +290,8 @@ lookup_redirection_data (edge e, enum insert_option insert) in the table. */ elt = XNEW (struct redirection_data); elt->path = path; - elt->dup_block = NULL; + elt->dup_blocks[0] = NULL; + elt->dup_blocks[1] = NULL; elt->incoming_edges = NULL; slot = redirection_data.find_slot (elt, insert); @@ -312,7 +328,7 @@ lookup_redirection_data (edge e, enum insert_option insert) to the list of incoming edges associated with E. */ if (insert) { - struct el *el = XNEW (struct el); + struct el *el = XNEW (struct el); el->next = elt->incoming_edges; el->e = e; elt->incoming_edges = el; @@ -322,6 +338,31 @@ lookup_redirection_data (edge e, enum insert_option insert) } } +/* Similar to copy_phi_args, except that the PHI arg exists, it just + does not have a value associated with it. */ + +static void +copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e) +{ + int src_idx = src_e->dest_idx; + int tgt_idx = tgt_e->dest_idx; + + /* Iterate over each PHI in e->dest. */ + for (gimple_stmt_iterator gsi = gsi_start_phis (src_e->dest), + gsi2 = gsi_start_phis (tgt_e->dest); + !gsi_end_p (gsi); + gsi_next (&gsi), gsi_next (&gsi2)) + { + gimple src_phi = gsi_stmt (gsi); + gimple dest_phi = gsi_stmt (gsi2); + tree val = gimple_phi_arg_def (src_phi, src_idx); + source_location locus = gimple_phi_arg_location (src_phi, src_idx); + + SET_PHI_ARG_DEF (dest_phi, tgt_idx, val); + gimple_phi_arg_set_location (dest_phi, tgt_idx, locus); + } +} + /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E. */ static void @@ -389,7 +430,7 @@ create_edge_and_update_destination_phis (struct redirection_data *rd, = new jump_thread_edge ((*path)[i]->e, (*path)[i]->type); copy->safe_push (x); } - e->aux = (void *)copy; + e->aux = (void *)copy; } else { @@ -403,7 +444,23 @@ create_edge_and_update_destination_phis (struct redirection_data *rd, copy_phi_args (e->dest, rd->path->last ()->e, e); } -/* Wire up the outgoing edges from the duplicate block and +/* Look through PATH beginning at START and return TRUE if there are + any additional blocks that need to be duplicated. Otherwise, + return FALSE. */ +static bool +any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path, + unsigned int start) +{ + for (unsigned int i = start + 1; i < path->length (); i++) + { + if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK + || (*path)[i]->type == EDGE_COPY_SRC_BLOCK) + return true; + } + return false; +} + +/* Wire up the outgoing edges from the duplicate blocks and update any PHIs as needed. */ void ssa_fix_duplicate_block_edges (struct redirection_data *rd, @@ -412,37 +469,77 @@ ssa_fix_duplicate_block_edges (struct redirection_data *rd, edge e = rd->incoming_edges->e; vec<jump_thread_edge *> *path = THREAD_PATH (e); - /* If we were threading through an joiner block, then we want - to keep its control statement and redirect an outgoing edge. - Else we want to remove the control statement & edges, then create - a new outgoing edge. In both cases we may need to update PHIs. */ - if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) - { - edge victim; - edge e2; - - /* This updates the PHIs at the destination of the duplicate - block. */ - update_destination_phis (local_info->bb, rd->dup_block); - - /* Find the edge from the duplicate block to the block we're - threading through. That's the edge we want to redirect. */ - victim = find_edge (rd->dup_block, (*path)[1]->e->dest); - e2 = redirect_edge_and_branch (victim, path->last ()->e->dest); - e2->count = path->last ()->e->count; - - /* If we redirected the edge, then we need to copy PHI arguments - at the target. If the edge already existed (e2 != victim case), - then the PHIs in the target already have the correct arguments. */ - if (e2 == victim) - copy_phi_args (e2->dest, path->last ()->e, e2); - } - else + for (unsigned int count = 0, i = 1; i < path->length (); i++) { - remove_ctrl_stmt_and_useless_edges (rd->dup_block, NULL); - create_edge_and_update_destination_phis (rd, rd->dup_block); + /* If we were threading through an joiner block, then we want + to keep its control statement and redirect an outgoing edge. + Else we want to remove the control statement & edges, then create + a new outgoing edge. In both cases we may need to update PHIs. */ + if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) + { + edge victim; + edge e2; + + /* This updates the PHIs at the destination of the duplicate + block. */ + update_destination_phis (local_info->bb, rd->dup_blocks[count]); + + /* Find the edge from the duplicate block to the block we're + threading through. That's the edge we want to redirect. */ + victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest); + + /* If there are no remaining blocks on the path to duplicate, + then redirect VICTIM to the final destination of the jump + threading path. */ + if (!any_remaining_duplicated_blocks (path, i)) + { + e2 = redirect_edge_and_branch (victim, path->last ()->e->dest); + e2->count = path->last ()->e->count; + /* If we redirected the edge, then we need to copy PHI arguments + at the target. If the edge already existed (e2 != victim + case), then the PHIs in the target already have the correct + arguments. */ + if (e2 == victim) + copy_phi_args (e2->dest, path->last ()->e, e2); + } + else + { + /* Redirect VICTIM to the next duplicated block in the path. */ + e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]); + + /* We need to update the PHIs in the next duplicated block. We + want the new PHI args to have the same value as they had + in the source of the next duplicate block. + + Thus, we need to know which edge we traversed into the + source of the duplicate. Furthermore, we may have + traversed many edges to reach the source of the duplicate. + + Walk through the path starting at element I until we + hit an edge marked with EDGE_COPY_SRC_BLOCK. We want + the edge from the prior element. */ + for (unsigned int j = i + 1; j < path->length (); j++) + { + if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK) + { + copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2); + break; + } + } + } + count++; + } + else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK) + { + remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL); + create_edge_and_update_destination_phis (rd, rd->dup_blocks[count]); + if (count == 1) + single_succ_edge (rd->dup_blocks[1])->aux = NULL; + count++; + } } } + /* Hash table traversal callback routine to create duplicate blocks. */ int @@ -451,12 +548,32 @@ ssa_create_duplicates (struct redirection_data **slot, { struct redirection_data *rd = *slot; + /* The second duplicated block in a jump threading path is specific + to the path. So it gets stored in RD rather than in LOCAL_DATA. + + Each time we're called, we have to look through the path and see + if a second block needs to be duplicated. + + Note the search starts with the third edge on the path. The first + edge is the incoming edge, the second edge always has its source + duplicated. Thus we start our search with the third edge. */ + vec<jump_thread_edge *> *path = rd->path; + for (unsigned int i = 2; i < path->length (); i++) + { + if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK + || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) + { + create_block_for_threading ((*path)[i]->e->src, rd, 1); + break; + } + } + /* Create a template block if we have not done so already. Otherwise use the template to create a new block. */ if (local_info->template_block == NULL) { - create_block_for_threading (local_info->bb, rd); - local_info->template_block = rd->dup_block; + create_block_for_threading ((*path)[1]->e->src, rd, 0); + local_info->template_block = rd->dup_blocks[0]; /* We do not create any outgoing edges for the template. We will take care of that in a later traversal. That way we do not @@ -464,7 +581,7 @@ ssa_create_duplicates (struct redirection_data **slot, } else { - create_block_for_threading (local_info->template_block, rd); + create_block_for_threading (local_info->template_block, rd, 0); /* Go ahead and wire up outgoing edges and update PHIs for the duplicate block. */ @@ -492,7 +609,7 @@ ssa_fixup_template_block (struct redirection_data **slot, to keep its control statement and redirect an outgoing edge. Else we want to remove the control statement & edges, then create a new outgoing edge. In both cases we may need to update PHIs. */ - if (rd->dup_block && rd->dup_block == local_info->template_block) + if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block) { ssa_fix_duplicate_block_edges (rd, local_info); return 0; @@ -526,36 +643,36 @@ ssa_redirect_edges (struct redirection_data **slot, thread_stats.num_threaded_edges++; - if (rd->dup_block) + if (rd->dup_blocks[0]) { edge e2; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Threaded jump %d --> %d to %d\n", - e->src->index, e->dest->index, rd->dup_block->index); + e->src->index, e->dest->index, rd->dup_blocks[0]->index); - rd->dup_block->count += e->count; + rd->dup_blocks[0]->count += e->count; /* Excessive jump threading may make frequencies large enough so the computation overflows. */ - if (rd->dup_block->frequency < BB_FREQ_MAX * 2) - rd->dup_block->frequency += EDGE_FREQUENCY (e); + if (rd->dup_blocks[0]->frequency < BB_FREQ_MAX * 2) + rd->dup_blocks[0]->frequency += EDGE_FREQUENCY (e); /* In the case of threading through a joiner block, the outgoing edges from the duplicate block were updated when they were redirected during ssa_fix_duplicate_block_edges. */ if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK) - EDGE_SUCC (rd->dup_block, 0)->count += e->count; + EDGE_SUCC (rd->dup_blocks[0], 0)->count += e->count; /* Redirect the incoming edge (possibly to the joiner block) to the appropriate duplicate block. */ - e2 = redirect_edge_and_branch (e, rd->dup_block); + e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]); gcc_assert (e == e2); flush_pending_stmts (e2); } /* Go ahead and clear E->aux. It's not needed anymore and failure - to clear it will cause all kinds of unpleasant problems later. */ + to clear it will cause all kinds of unpleasant problems later. */ delete_jump_thread_path (path); e->aux = NULL; @@ -580,9 +697,9 @@ redirection_block_p (basic_block bb) /* Advance to the first executable statement. */ gsi = gsi_start_bb (bb); while (!gsi_end_p (gsi) - && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL + && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL || is_gimple_debug (gsi_stmt (gsi)) - || gimple_nop_p (gsi_stmt (gsi)))) + || gimple_nop_p (gsi_stmt (gsi)))) gsi_next (&gsi); /* Check if this is an empty block. */ @@ -591,9 +708,9 @@ redirection_block_p (basic_block bb) /* Test that we've reached the terminating control statement. */ return gsi_stmt (gsi) - && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND - || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO - || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH); + && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND + || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO + || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH); } /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB @@ -615,7 +732,7 @@ redirection_block_p (basic_block bb) the appropriate duplicate of BB. If NOLOOP_ONLY is true, we only perform the threading as long as it - does not affect the structure of the loops in a nontrivial way. + does not affect the structure of the loops in a nontrivial way. If JOINERS is true, then thread through joiner blocks as well. */ @@ -678,22 +795,12 @@ thread_block_1 (basic_block bb, bool noloop_only, bool joiners) if (!e2 || noloop_only) { /* If NOLOOP_ONLY is true, we only allow threading through the - header of a loop to exit edges. - - There are two cases to consider. The first when BB is the - loop header. We will attempt to thread this elsewhere, so - we can just continue here. */ - - if (bb == bb->loop_father->header - && (!loop_exit_edge_p (bb->loop_father, e2) - || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)) - continue; - + header of a loop to exit edges. */ - /* The second occurs when there was loop header buried in a jump - threading path. We do not try and thread this elsewhere, so - just cancel the jump threading request by clearing the AUX - field now. */ + /* One case occurs when there was loop header buried in a jump + threading path that crosses loop boundaries. We do not try + and thread this elsewhere, so just cancel the jump threading + request by clearing the AUX field now. */ if ((bb->loop_father != e2->src->loop_father && !loop_exit_edge_p (e2->src->loop_father, e2)) || (e2->src->loop_father != e2->dest->loop_father @@ -706,11 +813,40 @@ thread_block_1 (basic_block bb, bool noloop_only, bool joiners) e->aux = NULL; continue; } + + /* Another case occurs when trying to thread through our + own loop header, possibly from inside the loop. + + If our loop header is buried in the path, then go ahead + and cancel the jump threading request here. This likely + will need updating for the FSA/FSM coremark case. + + Other cases (BB is the loop header) are handled elsewhere. */ + unsigned int i; + for (i = 1; i < path->length (); i++) + { + if ((*path)[i]->e->src == bb->loop_father->header + && (!loop_exit_edge_p (bb->loop_father, e2) + || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)) + { + /* If i != 1, then it's a buried header that will not + be handled elsehwere. */ + if (i != 1) + { + delete_jump_thread_path (path); + e->aux = NULL; + } + break; + } + } + + if (i != path->length ()) + continue; } if (e->dest == e2->src) update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e), - e->count, (*THREAD_PATH (e))[1]->e); + e->count, (*THREAD_PATH (e))[1]->e); /* Insert the outgoing edge into the hash table if it is not already in the hash table. */ @@ -775,7 +911,7 @@ thread_block_1 (basic_block bb, bool noloop_only, bool joiners) By doing things this way we can be as aggressive as possible and not worry that copying a joiner block will create a jump threading opportunity. */ - + static bool thread_block (basic_block bb, bool noloop_only) { @@ -830,21 +966,21 @@ thread_single_edge (edge e) npath->safe_push (x); rd.path = npath; - create_block_for_threading (bb, &rd); - remove_ctrl_stmt_and_useless_edges (rd.dup_block, NULL); - create_edge_and_update_destination_phis (&rd, rd.dup_block); + create_block_for_threading (bb, &rd, 0); + remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL); + create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0]); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Threaded jump %d --> %d to %d\n", - e->src->index, e->dest->index, rd.dup_block->index); + e->src->index, e->dest->index, rd.dup_blocks[0]->index); - rd.dup_block->count = e->count; - rd.dup_block->frequency = EDGE_FREQUENCY (e); - single_succ_edge (rd.dup_block)->count = e->count; - redirect_edge_and_branch (e, rd.dup_block); + rd.dup_blocks[0]->count = e->count; + rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e); + single_succ_edge (rd.dup_blocks[0])->count = e->count; + redirect_edge_and_branch (e, rd.dup_blocks[0]); flush_pending_stmts (e); - return rd.dup_block; + return rd.dup_blocks[0]; } /* Callback for dfs_enumerate_from. Returns true if BB is different @@ -1025,11 +1161,22 @@ thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers) if (single_succ_p (header)) goto fail; + /* If we threaded the latch using a joiner block, we cancel the + threading opportunity out of an abundance of caution. However, + still allow threading from outside to inside the loop. */ if (latch->aux) { vec<jump_thread_edge *> *path = THREAD_PATH (latch); if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) - goto fail; + { + delete_jump_thread_path (path); + latch->aux = NULL; + } + } + + if (latch->aux) + { + vec<jump_thread_edge *> *path = THREAD_PATH (latch); tgt_edge = (*path)[1]->e; tgt_bb = tgt_edge->dest; } @@ -1114,7 +1261,7 @@ thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers) unsigned nblocks, i; /* First handle the case latch edge is redirected. We are copying - the loop header but not creating a multiple entry loop. Make the + the loop header but not creating a multiple entry loop. Make the cfg manipulation code aware of that fact. */ set_loop_copy (loop, loop); loop->latch = thread_single_edge (latch); @@ -1426,7 +1573,6 @@ thread_through_all_blocks (bool may_peel_loop_headers) bitmap_iterator bi; bitmap threaded_blocks; struct loop *loop; - loop_iterator li; /* We must know about loops in order to preserve them. */ gcc_assert (current_loops != NULL); @@ -1454,7 +1600,7 @@ thread_through_all_blocks (bool may_peel_loop_headers) /* Then perform the threading through loop headers. We start with the innermost loop, so that the changes in cfg we perform won't affect further threading. */ - FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) + FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) { if (!loop->header || !bitmap_bit_p (threaded_blocks, loop->header->index)) @@ -1464,7 +1610,7 @@ thread_through_all_blocks (bool may_peel_loop_headers) } /* Assume we had a jump thread path which went from the latch to the exit - and a path which goes from outside to inside the same loop. + and a path which goes from outside to inside the same loop. If the latch to exit was handled first, we will thread it and clear loop->header. |