/* SSA Jump Threading
Copyright (C) 2005-2019 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "predict.h"
#include "tree.h"
#include "gimple.h"
#include "fold-const.h"
#include "cfgloop.h"
#include "gimple-iterator.h"
#include "tree-cfg.h"
#include "tree-ssa-threadupdate.h"
#include "params.h"
#include "tree-ssa-loop.h"
#include "cfganal.h"
#include "tree-pass.h"
#include "gimple-ssa.h"
#include "tree-phinodes.h"
#include "tree-inline.h"
#include "tree-vectorizer.h"
#include "stringpool.h"
#include "tree-vrp.h"
#include "tree-ssanames.h"
#include "ssa-range.h"
#include "domwalk.h"
#include "graph.h"
#include "tree-pretty-print.h"
#include "gimple-pretty-print.h"
DEBUG_FUNCTION
void graphme()
{
system("rm -f /tmp/base.dot");
print_graph_cfg("/tmp/base", cfun);
system("~/bin/dotview");
}
// If NAME is defined in block BB, return the gimple statement pointer,
// otherwise return NULL>
inline gimple *
ssa_name_same_bb_p (tree name, basic_block bb)
{
gimple *g = SSA_NAME_DEF_STMT (name);
if (!g || gimple_bb (g) != bb)
return NULL;
return g;
}
class thread_ranger : public global_ranger
{
public:
thread_ranger ();
~thread_ranger ();
enum path_range_direction { FORWARD, REVERSE };
/* Evaluate statement G assuming entry only via edge E */
bool range_of_stmt_edge (irange& r, gimple *g, edge e);
bool path_range_list (irange &r, tree name, const vec &bbs,
enum path_range_direction, edge start_edge = NULL);
private:
bool path_range_list_reverse (irange &r, tree name, const vec &);
};
thread_ranger::thread_ranger ()
{
}
thread_ranger::~thread_ranger ()
{
}
// Attempt to evaluate NAME within the basic block it is defined assuming the
// block was entered via edge E.
bool
thread_ranger::range_of_stmt_edge (irange &r, gimple *g, edge e)
{
basic_block bb = gimple_bb (g);
/* The edge provided must be an incoming edge to this BB. */
gcc_assert (e->dest == bb);
// Note that since we are remaining within BB, we do not attempt to further
// evaluate any of the arguments of a PHI at this point.
// For the moment, just pick up any cheap edge information.
gphi *phi = dyn_cast (g);
if (phi)
{
tree arg;
gcc_assert (e->dest == bb);
arg = gimple_phi_arg_def (phi, e->dest_idx);
// Pick up anything simple we might know about the incoming edge.
if (TREE_CODE (arg) != SSA_NAME || !outgoing_edge_range_p (r, e, arg))
return range_of_expr (r, arg);
return true;
}
grange *stmt = dyn_cast(g);
if (!stmt)
return false;
irange range1, range2;
tree op = gimple_range_operand1 (stmt);
if (!valid_range_ssa_p (op) || !ssa_name_same_bb_p (op, bb) ||
!range_of_stmt_edge (range1, SSA_NAME_DEF_STMT (op), e))
if (!range_of_expr (range1, op))
return false;
op = gimple_range_operand2 (stmt);
if (!op)
return gimple_range_fold (stmt, r, range1);
if (!valid_range_ssa_p (op) || !ssa_name_same_bb_p (op, bb) ||
!range_of_stmt_edge (range2, SSA_NAME_DEF_STMT (op), e))
if (!range_of_expr (range2, op))
return false;
return gimple_range_fold (stmt, r, range1, range2);
}
// Calculate the known range for NAME on a path of basic blocks in
// BBS. If such a range exists, store it in R and return TRUE,
// otherwise return FALSE.
//
// DIR is FORWARD if BBS[0] is the definition and the last block is
// the use. DIR is REVERSE if the blocks are in reverse order.
//
// If there is an edge leading into this path that we'd like to take
// into account, such edge is START_EDGE. Otherwise, START_EDGE is
// set to NULL.
bool
thread_ranger::path_range_list (irange &r, tree name,
const vec &bbs,
enum path_range_direction dir, edge start_edge)
{
if (bbs.is_empty ())
return false;
/* If the first block defines NAME and it has meaningful range
information, use it, otherwise fall back to range for type.
Note: The first block may not always define NAME because we may
have pruned the paths such that the first block (bb1) is just the
first block that contains range info (bb99). For example:
bb1:
x = 55;
...
...
bb99:
if (x > blah).
*/
basic_block first_bb = dir == FORWARD ? bbs[0] : bbs[bbs.length () - 1];
gimple *def_stmt = SSA_NAME_DEF_STMT (name);
if (gimple_bb (def_stmt) == first_bb && start_edge)
{
if (!range_of_stmt_edge (r, def_stmt, start_edge))
if (!range_of_stmt (r, def_stmt))
return false;
}
else
range_on_entry (r, first_bb, name);
if (dir == REVERSE)
return path_range_list_reverse (r, name, bbs);
for (unsigned i = 1; i < bbs.length (); ++i)
{
edge e = find_edge (bbs[i - 1], bbs[i]);
gcc_assert (e);
irange redge;
if (outgoing_edge_range_p (redge, e, name))
r.intersect (redge);
}
return !r.varying_p ();
}
/* The same as above, but handle the case where BBS are a path of
basic blocks in reverse order.
BBS[0] is the USE of NAME.
BBS[LEN-1] is the DEF of NAME. */
bool
thread_ranger::path_range_list_reverse (irange &r, tree name,
const vec &bbs)
{
for (int i = bbs.length () - 1; i > 0; --i)
{
edge e = find_edge (bbs[i], bbs[i - 1]);
gcc_assert (e);
irange redge;
if (outgoing_edge_range_p (redge, e, name))
r.intersect (redge);
}
return !r.varying_p ();
}
/* Class to generate all paths from an SSA name to a use of NAME.
Note: we discard any paths greater than PARAM_MAX_FSM_THREAD_LENGTH.
Use it like this:
bb_paths p;
p.calculate (x_99, some_bb);
for (unsigned i = 0; i < p.length (); i++)
{
vec path = p[i];
do_funky_things (path);
}
*/
class bb_paths
{
/* All paths from DEF_BB to USE_BB. */
vec< vec > all_paths;
/* The SSA name we are interested in. */
tree name;
/* The BB defining NAME. */
basic_block def_bb;
/* The BB using NAME. */
basic_block use_bb;
/* One ranger for everything so ranges get cached. */
thread_ranger ranger;
void calculate_1 (vec &path, basic_block bb,
hash_set &visited);
void prune_duplicate_paths (void);
void prune_irrelevant_range_blocks (vec &path);
void dump_one_path (FILE *out, const vec &path);
public:
bb_paths ();
~bb_paths ();
tree get_name (void) { return name; }
void calculate (tree ssa, basic_block use);
/* Calculate the range for PATH and store it in R.
Return TRUE if R is not the entire range for type. */
bool range_of_path (irange &r, vec &path,
edge start_edge = NULL)
{
return ranger.path_range_list (r, name, path, thread_ranger::REVERSE,
start_edge);
}
/* Attempt to fold STMT given VAR and its known range VAR_RANGE.
Store the resulting range in R and return TRUE if R is non
empty. */
bool range_of_folded_stmt (irange &r, gimple *stmt, tree var,
const irange var_range)
{
return ranger.range_of_stmt_with_range (r, stmt, var, var_range);
}
/* Return the ultimate SSA name for which NAME depends on. */
tree terminal_name (void)
{
return ranger.terminal_name(name);
}
const vec &operator[] (unsigned i) const { return all_paths[i]; }
vec &operator[] (unsigned i) { return all_paths[i]; }
unsigned length () const { return all_paths.length (); }
void dump ();
};
bb_paths::bb_paths ()
{
all_paths = vNULL;
}
bb_paths::~bb_paths ()
{
for (unsigned i = 0; i < all_paths.length (); ++i)
all_paths[i].release ();
all_paths.release ();
}
/* Generate all paths from the definition of SSA to its USE.
Accumulate the paths in ALL_PATHS. */
void
bb_paths::calculate (tree ssa, basic_block use)
{
all_paths = vNULL;
name = ssa;
use_bb = use;
if (SSA_NAME_IS_DEFAULT_DEF (name))
def_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
else
def_bb = gimple_bb (SSA_NAME_DEF_STMT (name));
/* A path where the use and the definition are in the same block, is
a path of one block, and can be handled specially. */
if (use_bb == def_bb)
{
vec v;
v.create (1);
v.quick_push (use_bb);
all_paths.safe_push (v);
return;
}
hash_set visited;
auto_vec path;
calculate_1 (path, use_bb, visited);
prune_duplicate_paths ();
}
/* Helper function for calculate(). Calculate all paths from DEF_BB
to BB and store them into ALL_PATHS.
PATH is the current path being accumulated. VISITED is a hash of
visited blocks. */
void
bb_paths::calculate_1 (vec &path, basic_block bb,
hash_set &visited)
{
if (!def_bb)
return;
/* Discard loops. */
if (visited.add (bb))
return;
if ((int)path.length () + 1 > PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH))
return;
/* As an optimization, we disregard paths that cross loops. Since
profitable_jump_thread_path() will ignore them, we can avoid
putting them in the queue altogether. */
if (!path.is_empty () && path[0]->loop_father != bb->loop_father)
return;
path.safe_push (bb);
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, bb->preds)
{
/* If we reached the defining block, we're at the top, and have a
complete path. */
if (e->src == def_bb)
{
/* If we've already seen DEF_BB, we have a complete loop
back to DEF_BB. We discard loops, so... */
if (visited.contains (def_bb))
return;
/* As mentioned in profitable_jump_thread_path(), the last
entry in a path (DEF_BB in our case) represents the block
with an outgoing edge that will redirect to the jump
threading path. Thus, we don't care if DEF_BB lives in a
loop different than the rest of the path we are
accumulating. This is why we don't perform the
loop_father optimization at the beginning of this
function. */
/* Push the DEF_BB for completeness sake. */
path.safe_push (def_bb);
vec t = path.copy ();
all_paths.safe_push (t);
path.pop ();
}
else
calculate_1 (path, e->src, visited);
}
path.pop ();
visited.remove (bb);
}
/* If we have a path that generates range information for SSA, any
blocks until the first block that contains range information is
irrelevant for the range. Prune such blocks. */
void
bb_paths::prune_irrelevant_range_blocks (vec &path)
{
/* If the path is just one block, we have a path where the
definition and the use are in the same BB. In this case, there
is nothing to do. */
if (path.length () == 1)
return;
for (int i = path.length () - 1; i > 0; --i)
{
irange r;
edge e;
gcc_assert (e = find_edge (path[i], path[i - 1]));
if (ranger.outgoing_edge_range_p (r, e, name))
{
/* Remove anything that came before here. */
path.truncate (i + 1);
return;
}
}
path.truncate (0);
}
/* Clean-up paths by removing any irrelevant blocks in the paths
themselves and then removing any duplicate paths. */
void
bb_paths::prune_duplicate_paths (void)
{
if (all_paths.is_empty ())
return;
vec prev_path = vNULL;
vec< vec > new_all_paths;
/* If we prune any duplicates, new_all_paths will have some extra
memory allocated. We dont' care, as this won't live long. */
new_all_paths.create (all_paths.length ());
/* Get rid of useless blocks per path, and then accumulate all
non-duplicate paths into new_all_paths. Removing duplicates is
easy because all_paths is already sorted. */
for (unsigned i = 0; i < all_paths.length (); ++i)
{
vec path = all_paths[i];
prune_irrelevant_range_blocks (path);
if (path.is_empty ())
{
path.release ();
continue;
}
/* Is this a duplicate of the last known path? */
bool duplicate = false;
if (prev_path.length () == path.length ())
{
duplicate = true;
for (unsigned i = 0; i < path.length (); ++i)
if (path[i] != prev_path[i])
{
duplicate = false;
break;
}
}
if (duplicate)
path.release ();
else
{
prev_path = path;
new_all_paths.safe_push (path);
}
}
all_paths.release ();
all_paths = new_all_paths;
}
/* Helper function for bb_paths::dump to dump one PATH to OUT. */
void
bb_paths::dump_one_path (FILE *out, const vec &path)
{
fprintf (out, "\tpath: ");
if (path.is_empty ())
{
fprintf (out, "\n");
return;
}
for (int i = path.length () - 1; i > 0; --i)
{
fprintf (out, "bb%d", path[i]->index);
edge e = find_edge (path[i], path[i - 1]);
gcc_assert (e);
fprintf (out, " => ");
}
fprintf (out, "bb%d\n", path[0]->index);
}
/* Dump all available paths. */
void
bb_paths::dump ()
{
if (all_paths.is_empty ())
return;
fprintf (stderr, "range path to BB%d for SSA = ", use_bb->index);
print_generic_stmt (stderr, name, TDF_NONE);
for (unsigned i = 0; i < length (); ++i)
{
irange r;
vec path = all_paths[i];
dump_one_path (stderr, path);
if (range_of_path (r, path))
{
fprintf (stderr, "\t ");
r.dump ();
}
else
fprintf (stderr, "\n");
}
fprintf (stderr, "-----------------------------\n");
}
class thread_jumps
{
public:
void find_jump_threads_backwards (basic_block bb, bool speed_p);
private:
edge profitable_jump_thread_path (basic_block bbi, tree name,
const irange &range_for_name,
bool *creates_irreducible_loop);
void convert_and_register_current_path (edge taken_edge);
void register_jump_thread_path_if_profitable (tree name, const irange &irange,
basic_block def_bb);
void handle_assignment (gimple *stmt, tree name, basic_block def_bb);
void handle_phi (gphi *phi, tree name, basic_block def_bb);
void fsm_find_control_statement_thread_paths (tree name);
bool check_subpath_and_update_thread_path (basic_block last_bb,
basic_block new_bb,
int *next_path_length);
void find_range_based_jump_threads (tree name, basic_block bb);
bool maybe_register_range_based_phis (void);
void register_current_range_based_path (irange &r);
bool resolve_control_statement (gimple *stmt, tree name,
const irange &range_for_name, tree &result);
/* Maximum number of BBs we are allowed to thread. */
int m_max_threaded_paths;
/* Hash to keep track of seen bbs. */
hash_set m_visited_bbs;
/* Current path we're analyzing. */
auto_vec m_path;
/* Tracks if we have recursed through a loop PHI node. */
bool m_seen_loop_phi;
/* Indicate that we could increase code size to improve the
code path. */
bool m_speed_p;
/* All paths from a definition of an SSA name to a use of it. */
bb_paths m_all_paths_to_ssa;
};
/* Simple helper to get the last statement from BB, which is assumed
to be a control statement. Return NULL if the last statement is
not a control statement. */
static gimple *
get_gimple_control_stmt (basic_block bb)
{
gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
if (gsi_end_p (gsi))
return NULL;
gimple *stmt = gsi_stmt (gsi);
enum gimple_code code = gimple_code (stmt);
if (code == GIMPLE_COND || code == GIMPLE_SWITCH || code == GIMPLE_GOTO)
return stmt;
return NULL;
}
/* Return true if the CFG contains at least one path from START_BB to
END_BB. When a path is found, record in PATH the blocks from
END_BB to START_BB. LOCAL_VISITED_BBS is used to make sure we
don't fall into an infinite loop. Bound the recursion to basic
blocks belonging to LOOP. */
static bool
fsm_find_thread_path (basic_block start_bb, basic_block end_bb,
vec &path,
hash_set &local_visited_bbs,
loop_p loop)
{
if (loop != start_bb->loop_father)
return false;
if (start_bb == end_bb)
{
path.safe_push (start_bb);
return true;
}
if (!local_visited_bbs.add (start_bb))
{
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, start_bb->succs)
if (fsm_find_thread_path (e->dest, end_bb, path, local_visited_bbs,
loop))
{
path.safe_push (start_bb);
return true;
}
}
return false;
}
/* Try to evaluate the control statement and see if we can fold to a
constant. If we do, set RESULT to it and return TRUE.
Otherwise, return FALSE.
For GIMPLE_COND we evaluate the control statement and set the
substituted true/false value in RESULT.
For GIMPLE_SWITCH we set RESULT to NULL if evaluating the switch
will yield the default case. */
bool
thread_jumps::resolve_control_statement (gimple *stmt, tree name,
const irange &range_for_name,
tree &result)
{
switch (gimple_code (stmt))
{
case GIMPLE_COND:
{
/* It looks like NAME is not necessarily the same SSA name as
the LHS conditional here. Look at the LHS to make sure
the ranger gets the right range. */
tree var = gimple_cond_lhs (stmt);
irange r;
if (!m_all_paths_to_ssa.range_of_folded_stmt (r, stmt, var,
range_for_name)
|| !r.singleton_p (&result))
return false;
return true;
}
case GIMPLE_SWITCH:
{
/* Handle the simple case fast. */
if (range_for_name.singleton_p (&result))
return true;
gswitch *gs = as_a (stmt);
for (unsigned i = 1; i < gimple_switch_num_labels (gs); ++i)
{
tree label = gimple_switch_label (gs, i);
tree case_low = CASE_LOW (label);
tree case_high = CASE_HIGH (label);
if (!case_high)
case_high = case_low;
irange label_range (case_low, case_high);
range_cast (label_range, TREE_TYPE (name));
/* If NAME can fall into one of the switch cases, we can't
be sure where the switch will land. */
if (!range_intersect (range_for_name, label_range).undefined_p ())
return false;
/* If we have an exact match, for example, a case of 3..10
with a known range of [3,10], then we know we will
always select this case. Set RESULT to any number
within the range so find_taken_edge() can find the
right case.
?? Is this even worth the effort? */
if (range_for_name == label_range)
{
wide_int any_number_in_range = range_for_name.lower_bound ();
result = wide_int_to_tree (TREE_TYPE (name),
any_number_in_range);
return true;
}
}
/* If we couldn't find anything, the only alternative is that
we will always select the default case. Returning NULL
indicates that the switch will yield the default case. */
result = NULL;
return true;
}
case GIMPLE_GOTO:
if (!range_for_name.singleton_p (&result))
return false;
return true;
default:
gcc_unreachable ();
return false;
}
}
/* Examine jump threading path PATH to which we want to add BBI.
If the resulting path is profitable to thread, then return the
final taken edge from the path, NULL otherwise.
NAME is the SSA_NAME of the variable we found to have a constant
value on PATH. RANGE_FOR_NAME is the range of NAME on that path.
BBI will be appended to PATH when we have a profitable jump
threading path. Callers are responsible for removing BBI from PATH
in that case. */
edge
thread_jumps::profitable_jump_thread_path (basic_block bbi, tree name,
const irange &range_for_name,
bool *creates_irreducible_loop)
{
/* Note BBI is not in the path yet, hence the +1 in the test below
to make sure BBI is accounted for in the path length test. */
/* We can get a length of 0 here when the statement that
makes a conditional generate a compile-time constant
result is in the same block as the conditional.
That's not really a jump threading opportunity, but instead is
simple cprop & simplification. We could handle it here if we
wanted by wiring up all the incoming edges. If we run this
early in IPA, that might be worth doing. For now we just
reject that case. */
if (m_path.is_empty ())
return NULL;
if (m_path.length () + 1
> (unsigned) PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "FSM jump-thread path not considered: "
"the number of basic blocks on the path "
"exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n");
return NULL;
}
if (m_max_threaded_paths <= 0)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "FSM jump-thread path not considered: "
"the number of previously recorded FSM paths to "
"thread exceeds PARAM_MAX_FSM_THREAD_PATHS.\n");
return NULL;
}
if (range_for_name.undefined_p ())
return NULL;
gimple *stmt = get_gimple_control_stmt (m_path[0]);
tree arg;
edge taken_edge = NULL;
if (!resolve_control_statement (stmt, name, range_for_name, arg))
return NULL;
/* Special case in resolve_control_statement. NULL means take the
default case. */
if (gimple_code (stmt) == GIMPLE_SWITCH && arg == NULL)
{
tree taken_case = gimple_switch_default_label (as_a (stmt));
basic_block dest_bb = label_to_block (cfun, CASE_LABEL (taken_case));
taken_edge = find_edge (m_path[0], dest_bb);
}
/* Add BBI to the path.
From this point onward, if we decide we the path is not profitable
to thread, we must remove BBI from the path. */
m_path.safe_push (bbi);
int n_insns = 0;
gimple_stmt_iterator gsi;
loop_p loop = m_path[0]->loop_father;
bool path_crosses_loops = false;
bool threaded_through_latch = false;
bool multiway_branch_in_path = false;
bool threaded_multiway_branch = false;
bool contains_hot_bb = false;
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Checking profitability of path (backwards): ");
/* Count the number of instructions on the path: as these instructions
will have to be duplicated, we will not record the path if there
are too many instructions on the path. Also check that all the
blocks in the path belong to a single loop. */
for (unsigned j = 0; j < m_path.length (); j++)
{
basic_block bb = m_path[j];
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " bb:%i", bb->index);
/* Remember, blocks in the path are stored in opposite order
in the PATH array. The last entry in the array represents
the block with an outgoing edge that we will redirect to the
jump threading path. Thus we don't care about that block's
loop father, nor how many statements are in that block because
it will not be copied or whether or not it ends in a multiway
branch. */
if (j < m_path.length () - 1)
{
int orig_n_insns = n_insns;
if (bb->loop_father != loop)
{
path_crosses_loops = true;
break;
}
/* PHIs in the path will create degenerate PHIS in the
copied path which will then get propagated away, so
looking at just the duplicate path the PHIs would
seem unimportant.
But those PHIs, because they're assignments to objects
typically with lives that exist outside the thread path,
will tend to generate PHIs (or at least new PHI arguments)
at points where we leave the thread path and rejoin
the original blocks. So we do want to account for them.
We ignore virtual PHIs. We also ignore cases where BB
has a single incoming edge. That's the most common
degenerate PHI we'll see here. Finally we ignore PHIs
that are associated with the value we're tracking as
that object likely dies. */
if (EDGE_COUNT (bb->succs) > 1 && EDGE_COUNT (bb->preds) > 1)
{
for (gphi_iterator gsip = gsi_start_phis (bb);
!gsi_end_p (gsip);
gsi_next (&gsip))
{
gphi *phi = gsip.phi ();
tree dst = gimple_phi_result (phi);
/* Note that if both NAME and DST are anonymous
SSA_NAMEs, then we do not have enough information
to consider them associated. */
if (dst != name
&& (SSA_NAME_VAR (dst) != SSA_NAME_VAR (name)
|| !SSA_NAME_VAR (dst))
&& !virtual_operand_p (dst))
++n_insns;
}
}
if (!contains_hot_bb && m_speed_p)
contains_hot_bb |= optimize_bb_for_speed_p (bb);
for (gsi = gsi_after_labels (bb);
!gsi_end_p (gsi);
gsi_next_nondebug (&gsi))
{
gimple *stmt = gsi_stmt (gsi);
if (gimple_call_internal_p (stmt, IFN_UNIQUE))
{
m_path.pop ();
return NULL;
}
/* Do not count empty statements and labels. */
if (gimple_code (stmt) != GIMPLE_NOP
&& !(gimple_code (stmt) == GIMPLE_ASSIGN
&& gimple_assign_rhs_code (stmt) == ASSERT_EXPR)
&& !is_gimple_debug (stmt))
n_insns += estimate_num_insns (stmt, &eni_size_weights);
}
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " (%i insns)", n_insns-orig_n_insns);
/* We do not look at the block with the threaded branch
in this loop. So if any block with a last statement that
is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
multiway branch on our path.
The block in PATH[0] is special, it's the block were we're
going to be able to eliminate its branch. */
gimple *last = last_stmt (bb);
if (last && (gimple_code (last) == GIMPLE_SWITCH
|| gimple_code (last) == GIMPLE_GOTO))
{
if (j == 0)
threaded_multiway_branch = true;
else
multiway_branch_in_path = true;
}
}
/* Note if we thread through the latch, we will want to include
the last entry in the array when determining if we thread
through the loop latch. */
if (loop->latch == bb)
threaded_through_latch = true;
}
/* We are going to remove the control statement at the end of the
last block in the threading path. So don't count it against our
statement count. */
int stmt_insns = estimate_num_insns (stmt, &eni_size_weights);
n_insns-= stmt_insns;
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\n Control statement insns: %i\n"
" Overall: %i insns\n",
stmt_insns, n_insns);
/* If this path threaded through the loop latch back into the
same loop and the destination does not dominate the loop
latch, then this thread would create an irreducible loop.
We have to know the outgoing edge to figure this out. */
if (!taken_edge)
taken_edge = find_taken_edge (m_path[0], arg);
/* There are cases where we may not be able to extract the
taken edge. For example, a computed goto to an absolute
address. Handle those cases gracefully. */
if (taken_edge == NULL)
{
m_path.pop ();
return NULL;
}
*creates_irreducible_loop = false;
if (threaded_through_latch
&& loop == taken_edge->dest->loop_father
&& (determine_bb_domination_status (loop, taken_edge->dest)
== DOMST_NONDOMINATING))
*creates_irreducible_loop = true;
if (path_crosses_loops)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "FSM jump-thread path not considered: "
"the path crosses loops.\n");
m_path.pop ();
return NULL;
}
/* Threading is profitable if the path duplicated is hot but also
in a case we separate cold path from hot path and permit optimization
of the hot path later. Be on the agressive side here. In some testcases,
as in PR 78407 this leads to noticeable improvements. */
if (m_speed_p && (optimize_edge_for_speed_p (taken_edge) || contains_hot_bb))
{
if (n_insns >= PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATH_INSNS))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "FSM jump-thread path not considered: "
"the number of instructions on the path "
"exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
m_path.pop ();
return NULL;
}
}
else if (n_insns > 1)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "FSM jump-thread path not considered: "
"duplication of %i insns is needed and optimizing for size.\n",
n_insns);
m_path.pop ();
return NULL;
}
/* We avoid creating irreducible inner loops unless we thread through
a multiway branch, in which case we have deemed it worth losing
other loop optimizations later.
We also consider it worth creating an irreducible inner loop if
the number of copied statement is low relative to the length of
the path -- in that case there's little the traditional loop
optimizer would have done anyway, so an irreducible loop is not
so bad. */
if (!threaded_multiway_branch && *creates_irreducible_loop
&& (n_insns * (unsigned) PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS)
> (m_path.length () *
(unsigned) PARAM_VALUE (PARAM_FSM_SCALE_PATH_BLOCKS))))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
"FSM would create irreducible loop without threading "
"multiway branch.\n");
m_path.pop ();
return NULL;
}
/* If this path does not thread through the loop latch, then we are
using the FSM threader to find old style jump threads. This
is good, except the FSM threader does not re-use an existing
threading path to reduce code duplication.
So for that case, drastically reduce the number of statements
we are allowed to copy. */
if (!(threaded_through_latch && threaded_multiway_branch)
&& (n_insns * PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS)
>= PARAM_VALUE (PARAM_MAX_JUMP_THREAD_DUPLICATION_STMTS)))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
"FSM did not thread around loop and would copy too "
"many statements (%d) [max=%d].\n",
n_insns * PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS),
PARAM_VALUE (PARAM_MAX_JUMP_THREAD_DUPLICATION_STMTS));
m_path.pop ();
return NULL;
}
/* When there is a multi-way branch on the path, then threading can
explode the CFG due to duplicating the edges for that multi-way
branch. So like above, only allow a multi-way branch on the path
if we actually thread a multi-way branch. */
if (!threaded_multiway_branch && multiway_branch_in_path)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
"FSM Thread through multiway branch without threading "
"a multiway branch.\n");
m_path.pop ();
return NULL;
}
return taken_edge;
}
/* The current path PATH is a vector of blocks forming a jump threading
path in reverse order. TAKEN_EDGE is the edge taken from path[0].
Convert the current path into the form used by register_jump_thread and
register it. */
void
thread_jumps::convert_and_register_current_path (edge taken_edge)
{
vec *jump_thread_path = new vec ();
/* Record the edges between the blocks in PATH. */
for (unsigned int j = 0; j + 1 < m_path.length (); j++)
{
basic_block bb1 = m_path[m_path.length () - j - 1];
basic_block bb2 = m_path[m_path.length () - j - 2];
edge e = find_edge (bb1, bb2);
gcc_assert (e);
jump_thread_edge *x = new jump_thread_edge (e, EDGE_FSM_THREAD);
jump_thread_path->safe_push (x);
}
/* Add the edge taken when the control variable has value ARG. */
jump_thread_edge *x
= new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK);
jump_thread_path->safe_push (x);
register_jump_thread (jump_thread_path);
--m_max_threaded_paths;
}
/* While following a chain of SSA_NAME definitions, we jumped from a
definition in LAST_BB to a definition in NEW_BB (walking
backwards).
Verify there is a single path between the blocks and none of the
blocks in the path is already in VISITED_BBS. If so, then update
VISISTED_BBS, add the new blocks to PATH and return TRUE.
Otherwise return FALSE.
Store the length of the subpath in NEXT_PATH_LENGTH. */
bool
thread_jumps::check_subpath_and_update_thread_path (basic_block last_bb,
basic_block new_bb,
int *next_path_length)
{
edge e;
int e_count = 0;
edge_iterator ei;
auto_vec next_path;
FOR_EACH_EDGE (e, ei, last_bb->preds)
{
hash_set local_visited_bbs;
if (fsm_find_thread_path (new_bb, e->src, next_path,
local_visited_bbs, e->src->loop_father))
++e_count;
/* If there is more than one path, stop. */
if (e_count > 1)
return false;
}
/* Stop if we have not found a path: this could occur when the recursion
is stopped by one of the bounds. */
if (e_count == 0)
return false;
/* Make sure we haven't already visited any of the nodes in
NEXT_PATH. Don't add them here to avoid pollution. */
for (unsigned int i = 0; i + 1 < next_path.length (); i++)
{
if (m_visited_bbs.contains (next_path[i]))
return false;
}
/* Now add the nodes to VISISTED_BBS. */
for (unsigned int i = 0; i + 1 < next_path.length (); i++)
m_visited_bbs.add (next_path[i]);
/* Append all the nodes from NEXT_PATH to PATH. */
m_path.safe_splice (next_path);
*next_path_length = next_path.length ();
return true;
}
/* If this is a profitable jump thread path, register it.
NAME is an SSA NAME with a possible constant value of ARG on PATH.
DEF_BB is the basic block that ultimately defines the constant. */
void
thread_jumps::register_jump_thread_path_if_profitable (tree name,
const irange &range,
basic_block def_bb)
{
if (range.undefined_p ())
return;
bool irreducible = false;
edge taken_edge = profitable_jump_thread_path (def_bb, name, range,
&irreducible);
if (taken_edge)
{
convert_and_register_current_path (taken_edge);
m_path.pop ();
if (irreducible)
vect_free_loop_info_assumptions (m_path[0]->loop_father);
}
}
/* Given PHI which defines NAME in block DEF_BB, recurse through the
PHI's arguments searching for paths where NAME will ultimately have
a constant value.
M_PATH contains the series of blocks to traverse that will result in
NAME having a constant value. */
void
thread_jumps::handle_phi (gphi *phi, tree name, basic_block def_bb)
{
/* Iterate over the arguments of PHI. */
for (unsigned int i = 0; i < gimple_phi_num_args (phi); i++)
{
tree arg = gimple_phi_arg_def (phi, i);
basic_block bbi = gimple_phi_arg_edge (phi, i)->src;
/* Skip edges pointing outside the current loop. */
if (!arg || def_bb->loop_father != bbi->loop_father)
continue;
if (TREE_CODE (arg) == SSA_NAME)
{
m_path.safe_push (bbi);
/* Recursively follow SSA_NAMEs looking for a constant
definition. */
fsm_find_control_statement_thread_paths (arg);
m_path.pop ();
continue;
}
if (TREE_CODE_CLASS (TREE_CODE (arg)) == tcc_constant)
{
irange range (arg, arg);
register_jump_thread_path_if_profitable (name, range, bbi);
}
}
}
/* Return TRUE if STMT is a gimple assignment we want to either directly
handle or recurse through. Return FALSE otherwise.
Note that adding more cases here requires adding cases to handle_assignment
below. */
static bool
handle_assignment_p (gimple *stmt)
{
if (is_gimple_assign (stmt))
{
enum tree_code def_code = gimple_assign_rhs_code (stmt);
/* If the RHS is an SSA_NAME, then we will recurse through it.
Go ahead and filter out cases where the SSA_NAME is a default
definition. There's little to be gained by trying to handle that. */
if (def_code == SSA_NAME
&& !SSA_NAME_IS_DEFAULT_DEF (gimple_assign_rhs1 (stmt)))
return true;
/* If the RHS is a constant, then it's a terminal that we'll want
to handle as well. */
if (TREE_CODE_CLASS (def_code) == tcc_constant)
return true;
}
/* Anything not explicitly allowed is not handled. */
return false;
}
/* Given STMT which defines NAME in block DEF_BB, recurse through the
PHI's arguments searching for paths where NAME will ultimately have
a constant value.
PATH contains the series of blocks to traverse that will result in
NAME having a constant value. */
void
thread_jumps::handle_assignment (gimple *stmt, tree name, basic_block def_bb)
{
tree arg = gimple_assign_rhs1 (stmt);
if (TREE_CODE (arg) == SSA_NAME)
fsm_find_control_statement_thread_paths (arg);
else
{
/* register_jump_thread_path_if_profitable will push the current
block onto the path. But the path will always have the current
block at this point. So we can just pop it. */
m_path.pop ();
if (TREE_CODE_CLASS (TREE_CODE (arg)) == tcc_constant)
{
irange range (arg, arg);
register_jump_thread_path_if_profitable (name, range, def_bb);
}
/* And put the current block back onto the path so that the
state of the stack is unchanged when we leave. */
m_path.safe_push (def_bb);
}
}
/* We trace the value of the SSA_NAME NAME back through any phi nodes
looking for places where it gets a constant value and save the
path. */
void
thread_jumps::fsm_find_control_statement_thread_paths (tree name)
{
/* If NAME appears in an abnormal PHI, then don't try to trace its
value back through PHI nodes. */
if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
return;
gimple *def_stmt = SSA_NAME_DEF_STMT (name);
basic_block def_bb = gimple_bb (def_stmt);
if (def_bb == NULL)
return;
/* We allow the SSA chain to contains PHIs and simple copies and constant
initializations. */
if (gimple_code (def_stmt) != GIMPLE_PHI
&& gimple_code (def_stmt) != GIMPLE_ASSIGN)
return;
if (gimple_code (def_stmt) == GIMPLE_PHI
&& (gimple_phi_num_args (def_stmt)
>= (unsigned) PARAM_VALUE (PARAM_FSM_MAXIMUM_PHI_ARGUMENTS)))
return;
if (is_gimple_assign (def_stmt)
&& ! handle_assignment_p (def_stmt))
return;
/* Avoid infinite recursion. */
if (m_visited_bbs.add (def_bb))
return;
int next_path_length = 0;
basic_block last_bb_in_path = m_path.last ();
if (loop_containing_stmt (def_stmt)->header == gimple_bb (def_stmt))
{
/* Do not walk through more than one loop PHI node. */
if (m_seen_loop_phi)
return;
m_seen_loop_phi = true;
}
/* Following the chain of SSA_NAME definitions, we jumped from a definition in
LAST_BB_IN_PATH to a definition in DEF_BB. When these basic blocks are
different, append to PATH the blocks from LAST_BB_IN_PATH to DEF_BB. */
if (def_bb != last_bb_in_path)
{
/* When DEF_BB == LAST_BB_IN_PATH, then the first block in the path
will already be in VISITED_BBS. When they are not equal, then we
must ensure that first block is accounted for to ensure we do not
create bogus jump threading paths. */
m_visited_bbs.add (m_path[0]);
if (!check_subpath_and_update_thread_path (last_bb_in_path, def_bb,
&next_path_length))
return;
}
gcc_assert (m_path.last () == def_bb);
if (gimple_code (def_stmt) == GIMPLE_PHI)
handle_phi (as_a (def_stmt), name, def_bb);
else if (gimple_code (def_stmt) == GIMPLE_ASSIGN)
handle_assignment (def_stmt, name, def_bb);
/* Remove all the nodes that we added from NEXT_PATH. */
if (next_path_length)
m_path.truncate (m_path.length () - next_path_length);
}
/* Register a range based thread in M_PATH.
R is the known range for said path. */
void
thread_jumps::register_current_range_based_path (irange &r)
{
tree name = m_all_paths_to_ssa.get_name ();
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file,
"Considering range-based path for jump threading: "
"SSA = ");
print_generic_stmt (dump_file, name, TDF_NONE);
fprintf (dump_file, "\tRange is: ");
r.dump (dump_file);
}
/* register_jump_thread_path_if_profitable will push
the current block onto the path. But the path
will always have the current block at this point.
So we can just pop it. */
basic_block def_bb = m_path.pop ();
register_jump_thread_path_if_profitable (m_all_paths_to_ssa.get_name (),
r, def_bb);
}
/* If M_PATH is not threadable, but taking into account the PHI that
feeds into it makes it threadable, thread it and return TRUE.
This is a special case we get for cheap without looking back deep
through PHIs. For example:
# v1_595 = PHI
_14 = v1_595 * 3600;
_15 = (unsigned int) _14;
_16 = _15 / 60;
...
if (_16 > 123)
Even though paths through bb5->bb99 are not threadable just taking
_16 into account, if we take into account the path coming in
through bb4, we can thread. This special cases PHIs that feed into
and are defined in the same BB as the range variable. */
bool
thread_jumps::maybe_register_range_based_phis ()
{
tree terminal = m_all_paths_to_ssa.terminal_name ();
if (!terminal)
return false;
/* See if NAME depends on a PHI that is defined in the same BB as
NAME. */
gimple *terminal_def = SSA_NAME_DEF_STMT (terminal);
basic_block terminal_bb = gimple_bb (terminal_def);
basic_block def_bb = m_path[m_path.length () - 1];
if (gimple_code (terminal_def) != GIMPLE_PHI
|| def_bb != terminal_bb)
return false;
/* If this PHI has any constant arguments, thread this range along
with the PHI edge that has the constant. */
bool ret = false;
gphi *phi = as_a (terminal_def);
for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
{
tree arg = gimple_phi_arg_def (phi, i);
edge e = gimple_phi_arg_edge (phi, i);
irange r;
/* If we're threading across loop boundaries, children will die. */
if (e->src->loop_father != e->dest->loop_father)
continue;
if (TREE_CODE_CLASS (TREE_CODE (arg)) == tcc_constant
&& m_all_paths_to_ssa.range_of_path (r, m_path, e))
{
m_path.safe_push (e->src);
register_current_range_based_path (r);
return true;
}
}
return ret;
}
/* Record jump threads based on range information.
It is assumed that BB ends with a control statement and that by
finding a path where NAME is a constant, we can thread the path. */
void
thread_jumps::find_range_based_jump_threads (tree name, basic_block bb)
{
if (!INTEGRAL_TYPE_P (TREE_TYPE (name)))
return;
m_all_paths_to_ssa.calculate (name, bb);
for (unsigned i = 0; i < m_all_paths_to_ssa.length (); ++i)
{
vec p = m_all_paths_to_ssa[i];
m_path.create (p.length ());
for (unsigned j = 0; j < p.length (); j++)
m_path.safe_push (p[j]);
/* Prefer paths containing a PHI with constant arguments, as
these paths, starting off with a constant, have a higher
chance of degrading into a singleton range. */
if (maybe_register_range_based_phis ())
continue;
irange r;
if (m_all_paths_to_ssa.range_of_path (r, m_path))
register_current_range_based_path (r);
}
}
/* Search backwards from BB looking for paths where NAME (an SSA_NAME)
is a constant. Record such paths for jump threading.
It is assumed that BB ends with a control statement and that by
finding a path where NAME is a constant, we can thread the path.
SPEED_P indicates that we could increase code size to improve the
code path. */
void
thread_jumps::find_jump_threads_backwards (basic_block bb, bool speed_p)
{
gimple *stmt = get_gimple_control_stmt (bb);
if (!stmt)
return;
enum gimple_code code = gimple_code (stmt);
tree name = NULL;
if (code == GIMPLE_SWITCH)
name = gimple_switch_index (as_a (stmt));
else if (code == GIMPLE_GOTO)
name = gimple_goto_dest (stmt);
else if (code == GIMPLE_COND)
{
if (TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME
&& TREE_CODE_CLASS (TREE_CODE (gimple_cond_rhs (stmt))) == tcc_constant
&& (INTEGRAL_TYPE_P (TREE_TYPE (gimple_cond_lhs (stmt)))
|| POINTER_TYPE_P (TREE_TYPE (gimple_cond_lhs (stmt)))))
name = gimple_cond_lhs (stmt);
}
if (!name || TREE_CODE (name) != SSA_NAME)
return;
/* Initialize pass local data that's different for each BB. */
m_path.truncate (0);
m_path.safe_push (bb);
m_visited_bbs.empty ();
m_seen_loop_phi = false;
m_speed_p = speed_p;
m_max_threaded_paths = PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATHS);
fsm_find_control_statement_thread_paths (name);
/* If we didn't thread any paths, try threading by making use of
available range information. */
if (PARAM_VALUE (PARAM_FSM_RANGE_BASED_THREADING)
&& m_max_threaded_paths == PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATHS))
find_range_based_jump_threads (name, bb);
}
namespace {
const pass_data pass_data_thread_jumps =
{
GIMPLE_PASS,
"thread",
OPTGROUP_NONE,
TV_TREE_SSA_THREAD_JUMPS,
( PROP_cfg | PROP_ssa ),
0,
0,
0,
TODO_update_ssa,
};
class pass_thread_jumps : public gimple_opt_pass
{
public:
pass_thread_jumps (gcc::context *ctxt)
: gimple_opt_pass (pass_data_thread_jumps, ctxt)
{}
opt_pass * clone (void) { return new pass_thread_jumps (m_ctxt); }
virtual bool gate (function *);
virtual unsigned int execute (function *);
};
bool
pass_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED)
{
return flag_expensive_optimizations;
}
unsigned int
pass_thread_jumps::execute (function *fun)
{
loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
/* Try to thread each block with more than one successor. */
thread_jumps threader;
basic_block bb;
FOR_EACH_BB_FN (bb, fun)
{
if (EDGE_COUNT (bb->succs) > 1)
threader.find_jump_threads_backwards (bb, true);
}
bool changed = thread_through_all_blocks (true);
loop_optimizer_finalize ();
return changed ? TODO_cleanup_cfg : 0;
}
}
gimple_opt_pass *
make_pass_thread_jumps (gcc::context *ctxt)
{
return new pass_thread_jumps (ctxt);
}
namespace {
const pass_data pass_data_early_thread_jumps =
{
GIMPLE_PASS,
"ethread",
OPTGROUP_NONE,
TV_TREE_SSA_THREAD_JUMPS,
( PROP_cfg | PROP_ssa ),
0,
0,
0,
( TODO_cleanup_cfg | TODO_update_ssa ),
};
class pass_early_thread_jumps : public gimple_opt_pass
{
public:
pass_early_thread_jumps (gcc::context *ctxt)
: gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
{}
opt_pass * clone (void) { return new pass_early_thread_jumps (m_ctxt); }
virtual bool gate (function *);
virtual unsigned int execute (function *);
};
bool
pass_early_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED)
{
return true;
}
unsigned int
pass_early_thread_jumps::execute (function *fun)
{
loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
/* Try to thread each block with more than one successor. */
thread_jumps threader;
basic_block bb;
FOR_EACH_BB_FN (bb, fun)
{
if (EDGE_COUNT (bb->succs) > 1)
threader.find_jump_threads_backwards (bb, false);
}
thread_through_all_blocks (true);
loop_optimizer_finalize ();
return 0;
}
}
gimple_opt_pass *
make_pass_early_thread_jumps (gcc::context *ctxt)
{
return new pass_early_thread_jumps (ctxt);
}