/* Passes for transactional memory support.
Copyright (C) 2008-2013 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "hash-table.h"
#include "tree.h"
#include "gimple.h"
#include "tree-flow.h"
#include "tree-pass.h"
#include "tree-inline.h"
#include "diagnostic-core.h"
#include "demangle.h"
#include "output.h"
#include "trans-mem.h"
#include "params.h"
#include "target.h"
#include "langhooks.h"
#include "gimple-pretty-print.h"
#include "cfgloop.h"
#define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1)
#define PROB_VERY_LIKELY (PROB_ALWAYS - PROB_VERY_UNLIKELY)
#define PROB_UNLIKELY (REG_BR_PROB_BASE / 5 - 1)
#define PROB_LIKELY (PROB_ALWAYS - PROB_VERY_LIKELY)
#define PROB_ALWAYS (REG_BR_PROB_BASE)
#define A_RUNINSTRUMENTEDCODE 0x0001
#define A_RUNUNINSTRUMENTEDCODE 0x0002
#define A_SAVELIVEVARIABLES 0x0004
#define A_RESTORELIVEVARIABLES 0x0008
#define A_ABORTTRANSACTION 0x0010
#define AR_USERABORT 0x0001
#define AR_USERRETRY 0x0002
#define AR_TMCONFLICT 0x0004
#define AR_EXCEPTIONBLOCKABORT 0x0008
#define AR_OUTERABORT 0x0010
#define MODE_SERIALIRREVOCABLE 0x0000
/* The representation of a transaction changes several times during the
lowering process. In the beginning, in the front-end we have the
GENERIC tree TRANSACTION_EXPR. For example,
__transaction {
local++;
if (++global == 10)
__tm_abort;
}
During initial gimplification (gimplify.c) the TRANSACTION_EXPR node is
trivially replaced with a GIMPLE_TRANSACTION node.
During pass_lower_tm, we examine the body of transactions looking
for aborts. Transactions that do not contain an abort may be
merged into an outer transaction. We also add a TRY-FINALLY node
to arrange for the transaction to be committed on any exit.
[??? Think about how this arrangement affects throw-with-commit
and throw-with-abort operations. In this case we want the TRY to
handle gotos, but not to catch any exceptions because the transaction
will already be closed.]
GIMPLE_TRANSACTION [label=NULL] {
try {
local = local + 1;
t0 = global;
t1 = t0 + 1;
global = t1;
if (t1 == 10)
__builtin___tm_abort ();
} finally {
__builtin___tm_commit ();
}
}
During pass_lower_eh, we create EH regions for the transactions,
intermixed with the regular EH stuff. This gives us a nice persistent
mapping (all the way through rtl) from transactional memory operation
back to the transaction, which allows us to get the abnormal edges
correct to model transaction aborts and restarts:
GIMPLE_TRANSACTION [label=over]
local = local + 1;
t0 = global;
t1 = t0 + 1;
global = t1;
if (t1 == 10)
__builtin___tm_abort ();
__builtin___tm_commit ();
over:
This is the end of all_lowering_passes, and so is what is present
during the IPA passes, and through all of the optimization passes.
During pass_ipa_tm, we examine all GIMPLE_TRANSACTION blocks in all
functions and mark functions for cloning.
At the end of gimple optimization, before exiting SSA form,
pass_tm_edges replaces statements that perform transactional
memory operations with the appropriate TM builtins, and swap
out function calls with their transactional clones. At this
point we introduce the abnormal transaction restart edges and
complete lowering of the GIMPLE_TRANSACTION node.
x = __builtin___tm_start (MAY_ABORT);
eh_label:
if (x & abort_transaction)
goto over;
local = local + 1;
t0 = __builtin___tm_load (global);
t1 = t0 + 1;
__builtin___tm_store (&global, t1);
if (t1 == 10)
__builtin___tm_abort ();
__builtin___tm_commit ();
over:
*/
static void *expand_regions (struct tm_region *,
void *(*callback)(struct tm_region *, void *),
void *, bool);
/* Return the attributes we want to examine for X, or NULL if it's not
something we examine. We look at function types, but allow pointers
to function types and function decls and peek through. */
static tree
get_attrs_for (const_tree x)
{
switch (TREE_CODE (x))
{
case FUNCTION_DECL:
return TYPE_ATTRIBUTES (TREE_TYPE (x));
break;
default:
if (TYPE_P (x))
return NULL;
x = TREE_TYPE (x);
if (TREE_CODE (x) != POINTER_TYPE)
return NULL;
/* FALLTHRU */
case POINTER_TYPE:
x = TREE_TYPE (x);
if (TREE_CODE (x) != FUNCTION_TYPE && TREE_CODE (x) != METHOD_TYPE)
return NULL;
/* FALLTHRU */
case FUNCTION_TYPE:
case METHOD_TYPE:
return TYPE_ATTRIBUTES (x);
}
}
/* Return true if X has been marked TM_PURE. */
bool
is_tm_pure (const_tree x)
{
unsigned flags;
switch (TREE_CODE (x))
{
case FUNCTION_DECL:
case FUNCTION_TYPE:
case METHOD_TYPE:
break;
default:
if (TYPE_P (x))
return false;
x = TREE_TYPE (x);
if (TREE_CODE (x) != POINTER_TYPE)
return false;
/* FALLTHRU */
case POINTER_TYPE:
x = TREE_TYPE (x);
if (TREE_CODE (x) != FUNCTION_TYPE && TREE_CODE (x) != METHOD_TYPE)
return false;
break;
}
flags = flags_from_decl_or_type (x);
return (flags & ECF_TM_PURE) != 0;
}
/* Return true if X has been marked TM_IRREVOCABLE. */
static bool
is_tm_irrevocable (tree x)
{
tree attrs = get_attrs_for (x);
if (attrs && lookup_attribute ("transaction_unsafe", attrs))
return true;
/* A call to the irrevocable builtin is by definition,
irrevocable. */
if (TREE_CODE (x) == ADDR_EXPR)
x = TREE_OPERAND (x, 0);
if (TREE_CODE (x) == FUNCTION_DECL
&& DECL_BUILT_IN_CLASS (x) == BUILT_IN_NORMAL
&& DECL_FUNCTION_CODE (x) == BUILT_IN_TM_IRREVOCABLE)
return true;
return false;
}
/* Return true if X has been marked TM_SAFE. */
bool
is_tm_safe (const_tree x)
{
if (flag_tm)
{
tree attrs = get_attrs_for (x);
if (attrs)
{
if (lookup_attribute ("transaction_safe", attrs))
return true;
if (lookup_attribute ("transaction_may_cancel_outer", attrs))
return true;
}
}
return false;
}
/* Return true if CALL is const, or tm_pure. */
static bool
is_tm_pure_call (gimple call)
{
tree fn = gimple_call_fn (call);
if (TREE_CODE (fn) == ADDR_EXPR)
{
fn = TREE_OPERAND (fn, 0);
gcc_assert (TREE_CODE (fn) == FUNCTION_DECL);
}
else
fn = TREE_TYPE (fn);
return is_tm_pure (fn);
}
/* Return true if X has been marked TM_CALLABLE. */
static bool
is_tm_callable (tree x)
{
tree attrs = get_attrs_for (x);
if (attrs)
{
if (lookup_attribute ("transaction_callable", attrs))
return true;
if (lookup_attribute ("transaction_safe", attrs))
return true;
if (lookup_attribute ("transaction_may_cancel_outer", attrs))
return true;
}
return false;
}
/* Return true if X has been marked TRANSACTION_MAY_CANCEL_OUTER. */
bool
is_tm_may_cancel_outer (tree x)
{
tree attrs = get_attrs_for (x);
if (attrs)
return lookup_attribute ("transaction_may_cancel_outer", attrs) != NULL;
return false;
}
/* Return true for built in functions that "end" a transaction. */
bool
is_tm_ending_fndecl (tree fndecl)
{
if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
switch (DECL_FUNCTION_CODE (fndecl))
{
case BUILT_IN_TM_COMMIT:
case BUILT_IN_TM_COMMIT_EH:
case BUILT_IN_TM_ABORT:
case BUILT_IN_TM_IRREVOCABLE:
return true;
default:
break;
}
return false;
}
/* Return true if STMT is a TM load. */
static bool
is_tm_load (gimple stmt)
{
tree fndecl;
if (gimple_code (stmt) != GIMPLE_CALL)
return false;
fndecl = gimple_call_fndecl (stmt);
return (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
&& BUILTIN_TM_LOAD_P (DECL_FUNCTION_CODE (fndecl)));
}
/* Same as above, but for simple TM loads, that is, not the
after-write, after-read, etc optimized variants. */
static bool
is_tm_simple_load (gimple stmt)
{
tree fndecl;
if (gimple_code (stmt) != GIMPLE_CALL)
return false;
fndecl = gimple_call_fndecl (stmt);
if (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
{
enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
return (fcode == BUILT_IN_TM_LOAD_1
|| fcode == BUILT_IN_TM_LOAD_2
|| fcode == BUILT_IN_TM_LOAD_4
|| fcode == BUILT_IN_TM_LOAD_8
|| fcode == BUILT_IN_TM_LOAD_FLOAT
|| fcode == BUILT_IN_TM_LOAD_DOUBLE
|| fcode == BUILT_IN_TM_LOAD_LDOUBLE
|| fcode == BUILT_IN_TM_LOAD_M64
|| fcode == BUILT_IN_TM_LOAD_M128
|| fcode == BUILT_IN_TM_LOAD_M256);
}
return false;
}
/* Return true if STMT is a TM store. */
static bool
is_tm_store (gimple stmt)
{
tree fndecl;
if (gimple_code (stmt) != GIMPLE_CALL)
return false;
fndecl = gimple_call_fndecl (stmt);
return (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
&& BUILTIN_TM_STORE_P (DECL_FUNCTION_CODE (fndecl)));
}
/* Same as above, but for simple TM stores, that is, not the
after-write, after-read, etc optimized variants. */
static bool
is_tm_simple_store (gimple stmt)
{
tree fndecl;
if (gimple_code (stmt) != GIMPLE_CALL)
return false;
fndecl = gimple_call_fndecl (stmt);
if (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
{
enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
return (fcode == BUILT_IN_TM_STORE_1
|| fcode == BUILT_IN_TM_STORE_2
|| fcode == BUILT_IN_TM_STORE_4
|| fcode == BUILT_IN_TM_STORE_8
|| fcode == BUILT_IN_TM_STORE_FLOAT
|| fcode == BUILT_IN_TM_STORE_DOUBLE
|| fcode == BUILT_IN_TM_STORE_LDOUBLE
|| fcode == BUILT_IN_TM_STORE_M64
|| fcode == BUILT_IN_TM_STORE_M128
|| fcode == BUILT_IN_TM_STORE_M256);
}
return false;
}
/* Return true if FNDECL is BUILT_IN_TM_ABORT. */
static bool
is_tm_abort (tree fndecl)
{
return (fndecl
&& DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
&& DECL_FUNCTION_CODE (fndecl) == BUILT_IN_TM_ABORT);
}
/* Build a GENERIC tree for a user abort. This is called by front ends
while transforming the __tm_abort statement. */
tree
build_tm_abort_call (location_t loc, bool is_outer)
{
return build_call_expr_loc (loc, builtin_decl_explicit (BUILT_IN_TM_ABORT), 1,
build_int_cst (integer_type_node,
AR_USERABORT
| (is_outer ? AR_OUTERABORT : 0)));
}
/* Common gateing function for several of the TM passes. */
static bool
gate_tm (void)
{
return flag_tm;
}
/* Map for aribtrary function replacement under TM, as created
by the tm_wrap attribute. */
static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
htab_t tm_wrap_map;
void
record_tm_replacement (tree from, tree to)
{
struct tree_map **slot, *h;
/* Do not inline wrapper functions that will get replaced in the TM
pass.
Suppose you have foo() that will get replaced into tmfoo(). Make
sure the inliner doesn't try to outsmart us and inline foo()
before we get a chance to do the TM replacement. */
DECL_UNINLINABLE (from) = 1;
if (tm_wrap_map == NULL)
tm_wrap_map = htab_create_ggc (32, tree_map_hash, tree_map_eq, 0);
h = ggc_alloc_tree_map ();
h->hash = htab_hash_pointer (from);
h->base.from = from;
h->to = to;
slot = (struct tree_map **)
htab_find_slot_with_hash (tm_wrap_map, h, h->hash, INSERT);
*slot = h;
}
/* Return a TM-aware replacement function for DECL. */
static tree
find_tm_replacement_function (tree fndecl)
{
if (tm_wrap_map)
{
struct tree_map *h, in;
in.base.from = fndecl;
in.hash = htab_hash_pointer (fndecl);
h = (struct tree_map *) htab_find_with_hash (tm_wrap_map, &in, in.hash);
if (h)
return h->to;
}
/* ??? We may well want TM versions of most of the common
functions. For now, we've already these two defined. */
/* Adjust expand_call_tm() attributes as necessary for the cases
handled here: */
if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
switch (DECL_FUNCTION_CODE (fndecl))
{
case BUILT_IN_MEMCPY:
return builtin_decl_explicit (BUILT_IN_TM_MEMCPY);
case BUILT_IN_MEMMOVE:
return builtin_decl_explicit (BUILT_IN_TM_MEMMOVE);
case BUILT_IN_MEMSET:
return builtin_decl_explicit (BUILT_IN_TM_MEMSET);
default:
return NULL;
}
return NULL;
}
/* When appropriate, record TM replacement for memory allocation functions.
FROM is the FNDECL to wrap. */
void
tm_malloc_replacement (tree from)
{
const char *str;
tree to;
if (TREE_CODE (from) != FUNCTION_DECL)
return;
/* If we have a previous replacement, the user must be explicitly
wrapping malloc/calloc/free. They better know what they're
doing... */
if (find_tm_replacement_function (from))
return;
str = IDENTIFIER_POINTER (DECL_NAME (from));
if (!strcmp (str, "malloc"))
to = builtin_decl_explicit (BUILT_IN_TM_MALLOC);
else if (!strcmp (str, "calloc"))
to = builtin_decl_explicit (BUILT_IN_TM_CALLOC);
else if (!strcmp (str, "free"))
to = builtin_decl_explicit (BUILT_IN_TM_FREE);
else
return;
TREE_NOTHROW (to) = 0;
record_tm_replacement (from, to);
}
/* Diagnostics for tm_safe functions/regions. Called by the front end
once we've lowered the function to high-gimple. */
/* Subroutine of diagnose_tm_safe_errors, called through walk_gimple_seq.
Process exactly one statement. WI->INFO is set to non-null when in
the context of a tm_safe function, and null for a __transaction block. */
#define DIAG_TM_OUTER 1
#define DIAG_TM_SAFE 2
#define DIAG_TM_RELAXED 4
struct diagnose_tm
{
unsigned int summary_flags : 8;
unsigned int block_flags : 8;
unsigned int func_flags : 8;
unsigned int saw_volatile : 1;
gimple stmt;
};
/* Return true if T is a volatile variable of some kind. */
static bool
volatile_var_p (tree t)
{
return (SSA_VAR_P (t)
&& TREE_THIS_VOLATILE (TREE_TYPE (t)));
}
/* Tree callback function for diagnose_tm pass. */
static tree
diagnose_tm_1_op (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED,
void *data)
{
struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
struct diagnose_tm *d = (struct diagnose_tm *) wi->info;
if (volatile_var_p (*tp)
&& d->block_flags & DIAG_TM_SAFE
&& !d->saw_volatile)
{
d->saw_volatile = 1;
error_at (gimple_location (d->stmt),
"invalid volatile use of %qD inside transaction",
*tp);
}
return NULL_TREE;
}
static tree
diagnose_tm_1 (gimple_stmt_iterator *gsi, bool *handled_ops_p,
struct walk_stmt_info *wi)
{
gimple stmt = gsi_stmt (*gsi);
struct diagnose_tm *d = (struct diagnose_tm *) wi->info;
/* Save stmt for use in leaf analysis. */
d->stmt = stmt;
switch (gimple_code (stmt))
{
case GIMPLE_CALL:
{
tree fn = gimple_call_fn (stmt);
if ((d->summary_flags & DIAG_TM_OUTER) == 0
&& is_tm_may_cancel_outer (fn))
error_at (gimple_location (stmt),
"% function call not within"
" outer transaction or %");
if (d->summary_flags & DIAG_TM_SAFE)
{
bool is_safe, direct_call_p;
tree replacement;
if (TREE_CODE (fn) == ADDR_EXPR
&& TREE_CODE (TREE_OPERAND (fn, 0)) == FUNCTION_DECL)
{
direct_call_p = true;
replacement = TREE_OPERAND (fn, 0);
replacement = find_tm_replacement_function (replacement);
if (replacement)
fn = replacement;
}
else
{
direct_call_p = false;
replacement = NULL_TREE;
}
if (is_tm_safe_or_pure (fn))
is_safe = true;
else if (is_tm_callable (fn) || is_tm_irrevocable (fn))
{
/* A function explicitly marked transaction_callable as
opposed to transaction_safe is being defined to be
unsafe as part of its ABI, regardless of its contents. */
is_safe = false;
}
else if (direct_call_p)
{
if (flags_from_decl_or_type (fn) & ECF_TM_BUILTIN)
is_safe = true;
else if (replacement)
{
/* ??? At present we've been considering replacements
merely transaction_callable, and therefore might
enter irrevocable. The tm_wrap attribute has not
yet made it into the new language spec. */
is_safe = false;
}
else
{
/* ??? Diagnostics for unmarked direct calls moved into
the IPA pass. Section 3.2 of the spec details how
functions not marked should be considered "implicitly
safe" based on having examined the function body. */
is_safe = true;
}
}
else
{
/* An unmarked indirect call. Consider it unsafe even
though optimization may yet figure out how to inline. */
is_safe = false;
}
if (!is_safe)
{
if (TREE_CODE (fn) == ADDR_EXPR)
fn = TREE_OPERAND (fn, 0);
if (d->block_flags & DIAG_TM_SAFE)
{
if (direct_call_p)
error_at (gimple_location (stmt),
"unsafe function call %qD within "
"atomic transaction", fn);
else
{
if (!DECL_P (fn) || DECL_NAME (fn))
error_at (gimple_location (stmt),
"unsafe function call %qE within "
"atomic transaction", fn);
else
error_at (gimple_location (stmt),
"unsafe indirect function call within "
"atomic transaction");
}
}
else
{
if (direct_call_p)
error_at (gimple_location (stmt),
"unsafe function call %qD within "
"% function", fn);
else
{
if (!DECL_P (fn) || DECL_NAME (fn))
error_at (gimple_location (stmt),
"unsafe function call %qE within "
"% function", fn);
else
error_at (gimple_location (stmt),
"unsafe indirect function call within "
"% function");
}
}
}
}
}
break;
case GIMPLE_ASM:
/* ??? We ought to come up with a way to add attributes to
asm statements, and then add "transaction_safe" to it.
Either that or get the language spec to resurrect __tm_waiver. */
if (d->block_flags & DIAG_TM_SAFE)
error_at (gimple_location (stmt),
"asm not allowed in atomic transaction");
else if (d->func_flags & DIAG_TM_SAFE)
error_at (gimple_location (stmt),
"asm not allowed in % function");
break;
case GIMPLE_TRANSACTION:
{
unsigned char inner_flags = DIAG_TM_SAFE;
if (gimple_transaction_subcode (stmt) & GTMA_IS_RELAXED)
{
if (d->block_flags & DIAG_TM_SAFE)
error_at (gimple_location (stmt),
"relaxed transaction in atomic transaction");
else if (d->func_flags & DIAG_TM_SAFE)
error_at (gimple_location (stmt),
"relaxed transaction in % function");
inner_flags = DIAG_TM_RELAXED;
}
else if (gimple_transaction_subcode (stmt) & GTMA_IS_OUTER)
{
if (d->block_flags)
error_at (gimple_location (stmt),
"outer transaction in transaction");
else if (d->func_flags & DIAG_TM_OUTER)
error_at (gimple_location (stmt),
"outer transaction in "
"% function");
else if (d->func_flags & DIAG_TM_SAFE)
error_at (gimple_location (stmt),
"outer transaction in % function");
inner_flags |= DIAG_TM_OUTER;
}
*handled_ops_p = true;
if (gimple_transaction_body (stmt))
{
struct walk_stmt_info wi_inner;
struct diagnose_tm d_inner;
memset (&d_inner, 0, sizeof (d_inner));
d_inner.func_flags = d->func_flags;
d_inner.block_flags = d->block_flags | inner_flags;
d_inner.summary_flags = d_inner.func_flags | d_inner.block_flags;
memset (&wi_inner, 0, sizeof (wi_inner));
wi_inner.info = &d_inner;
walk_gimple_seq (gimple_transaction_body (stmt),
diagnose_tm_1, diagnose_tm_1_op, &wi_inner);
}
}
break;
default:
break;
}
return NULL_TREE;
}
static unsigned int
diagnose_tm_blocks (void)
{
struct walk_stmt_info wi;
struct diagnose_tm d;
memset (&d, 0, sizeof (d));
if (is_tm_may_cancel_outer (current_function_decl))
d.func_flags = DIAG_TM_OUTER | DIAG_TM_SAFE;
else if (is_tm_safe (current_function_decl))
d.func_flags = DIAG_TM_SAFE;
d.summary_flags = d.func_flags;
memset (&wi, 0, sizeof (wi));
wi.info = &d;
walk_gimple_seq (gimple_body (current_function_decl),
diagnose_tm_1, diagnose_tm_1_op, &wi);
return 0;
}
struct gimple_opt_pass pass_diagnose_tm_blocks =
{
{
GIMPLE_PASS,
"*diagnose_tm_blocks", /* name */
OPTGROUP_NONE, /* optinfo_flags */
gate_tm, /* gate */
diagnose_tm_blocks, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_TRANS_MEM, /* tv_id */
PROP_gimple_any, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
}
};
/* Instead of instrumenting thread private memory, we save the
addresses in a log which we later use to save/restore the addresses
upon transaction start/restart.
The log is keyed by address, where each element contains individual
statements among different code paths that perform the store.
This log is later used to generate either plain save/restore of the
addresses upon transaction start/restart, or calls to the ITM_L*
logging functions.
So for something like:
struct large { int x[1000]; };
struct large lala = { 0 };
__transaction {
lala.x[i] = 123;
...
}
We can either save/restore:
lala = { 0 };
trxn = _ITM_startTransaction ();
if (trxn & a_saveLiveVariables)
tmp_lala1 = lala.x[i];
else if (a & a_restoreLiveVariables)
lala.x[i] = tmp_lala1;
or use the logging functions:
lala = { 0 };
trxn = _ITM_startTransaction ();
_ITM_LU4 (&lala.x[i]);
Obviously, if we use _ITM_L* to log, we prefer to call _ITM_L* as
far up the dominator tree to shadow all of the writes to a given
location (thus reducing the total number of logging calls), but not
so high as to be called on a path that does not perform a
write. */
/* One individual log entry. We may have multiple statements for the
same location if neither dominate each other (on different
execution paths). */
typedef struct tm_log_entry
{
/* Address to save. */
tree addr;
/* Entry block for the transaction this address occurs in. */
basic_block entry_block;
/* Dominating statements the store occurs in. */
gimple_vec stmts;
/* Initially, while we are building the log, we place a nonzero
value here to mean that this address *will* be saved with a
save/restore sequence. Later, when generating the save sequence
we place the SSA temp generated here. */
tree save_var;
} *tm_log_entry_t;
/* Log entry hashtable helpers. */
struct log_entry_hasher
{
typedef tm_log_entry value_type;
typedef tm_log_entry compare_type;
static inline hashval_t hash (const value_type *);
static inline bool equal (const value_type *, const compare_type *);
static inline void remove (value_type *);
};
/* Htab support. Return hash value for a `tm_log_entry'. */
inline hashval_t
log_entry_hasher::hash (const value_type *log)
{
return iterative_hash_expr (log->addr, 0);
}
/* Htab support. Return true if two log entries are the same. */
inline bool
log_entry_hasher::equal (const value_type *log1, const compare_type *log2)
{
/* FIXME:
rth: I suggest that we get rid of the component refs etc.
I.e. resolve the reference to base + offset.
We may need to actually finish a merge with mainline for this,
since we'd like to be presented with Richi's MEM_REF_EXPRs more
often than not. But in the meantime your tm_log_entry could save
the results of get_inner_reference.
See: g++.dg/tm/pr46653.C
*/
/* Special case plain equality because operand_equal_p() below will
return FALSE if the addresses are equal but they have
side-effects (e.g. a volatile address). */
if (log1->addr == log2->addr)
return true;
return operand_equal_p (log1->addr, log2->addr, 0);
}
/* Htab support. Free one tm_log_entry. */
inline void
log_entry_hasher::remove (value_type *lp)
{
lp->stmts.release ();
free (lp);
}
/* The actual log. */
static hash_table tm_log;
/* Addresses to log with a save/restore sequence. These should be in
dominator order. */
static vec tm_log_save_addresses;
enum thread_memory_type
{
mem_non_local = 0,
mem_thread_local,
mem_transaction_local,
mem_max
};
typedef struct tm_new_mem_map
{
/* SSA_NAME being dereferenced. */
tree val;
enum thread_memory_type local_new_memory;
} tm_new_mem_map_t;
/* Hashtable helpers. */
struct tm_mem_map_hasher : typed_free_remove
{
typedef tm_new_mem_map_t value_type;
typedef tm_new_mem_map_t compare_type;
static inline hashval_t hash (const value_type *);
static inline bool equal (const value_type *, const compare_type *);
};
inline hashval_t
tm_mem_map_hasher::hash (const value_type *v)
{
return (intptr_t)v->val >> 4;
}
inline bool
tm_mem_map_hasher::equal (const value_type *v, const compare_type *c)
{
return v->val == c->val;
}
/* Map for an SSA_NAME originally pointing to a non aliased new piece
of memory (malloc, alloc, etc). */
static hash_table tm_new_mem_hash;
/* Initialize logging data structures. */
static void
tm_log_init (void)
{
tm_log.create (10);
tm_new_mem_hash.create (5);
tm_log_save_addresses.create (5);
}
/* Free logging data structures. */
static void
tm_log_delete (void)
{
tm_log.dispose ();
tm_new_mem_hash.dispose ();
tm_log_save_addresses.release ();
}
/* Return true if MEM is a transaction invariant memory for the TM
region starting at REGION_ENTRY_BLOCK. */
static bool
transaction_invariant_address_p (const_tree mem, basic_block region_entry_block)
{
if ((TREE_CODE (mem) == INDIRECT_REF || TREE_CODE (mem) == MEM_REF)
&& TREE_CODE (TREE_OPERAND (mem, 0)) == SSA_NAME)
{
basic_block def_bb;
def_bb = gimple_bb (SSA_NAME_DEF_STMT (TREE_OPERAND (mem, 0)));
return def_bb != region_entry_block
&& dominated_by_p (CDI_DOMINATORS, region_entry_block, def_bb);
}
mem = strip_invariant_refs (mem);
return mem && (CONSTANT_CLASS_P (mem) || decl_address_invariant_p (mem));
}
/* Given an address ADDR in STMT, find it in the memory log or add it,
making sure to keep only the addresses highest in the dominator
tree.
ENTRY_BLOCK is the entry_block for the transaction.
If we find the address in the log, make sure it's either the same
address, or an equivalent one that dominates ADDR.
If we find the address, but neither ADDR dominates the found
address, nor the found one dominates ADDR, we're on different
execution paths. Add it.
If known, ENTRY_BLOCK is the entry block for the region, otherwise
NULL. */
static void
tm_log_add (basic_block entry_block, tree addr, gimple stmt)
{
tm_log_entry **slot;
struct tm_log_entry l, *lp;
l.addr = addr;
slot = tm_log.find_slot (&l, INSERT);
if (!*slot)
{
tree type = TREE_TYPE (addr);
lp = XNEW (struct tm_log_entry);
lp->addr = addr;
*slot = lp;
/* Small invariant addresses can be handled as save/restores. */
if (entry_block
&& transaction_invariant_address_p (lp->addr, entry_block)
&& TYPE_SIZE_UNIT (type) != NULL
&& host_integerp (TYPE_SIZE_UNIT (type), 1)
&& (tree_low_cst (TYPE_SIZE_UNIT (type), 1)
< PARAM_VALUE (PARAM_TM_MAX_AGGREGATE_SIZE))
/* We must be able to copy this type normally. I.e., no
special constructors and the like. */
&& !TREE_ADDRESSABLE (type))
{
lp->save_var = create_tmp_reg (TREE_TYPE (lp->addr), "tm_save");
lp->stmts.create (0);
lp->entry_block = entry_block;
/* Save addresses separately in dominator order so we don't
get confused by overlapping addresses in the save/restore
sequence. */
tm_log_save_addresses.safe_push (lp->addr);
}
else
{
/* Use the logging functions. */
lp->stmts.create (5);
lp->stmts.quick_push (stmt);
lp->save_var = NULL;
}
}
else
{
size_t i;
gimple oldstmt;
lp = *slot;
/* If we're generating a save/restore sequence, we don't care
about statements. */
if (lp->save_var)
return;
for (i = 0; lp->stmts.iterate (i, &oldstmt); ++i)
{
if (stmt == oldstmt)
return;
/* We already have a store to the same address, higher up the
dominator tree. Nothing to do. */
if (dominated_by_p (CDI_DOMINATORS,
gimple_bb (stmt), gimple_bb (oldstmt)))
return;
/* We should be processing blocks in dominator tree order. */
gcc_assert (!dominated_by_p (CDI_DOMINATORS,
gimple_bb (oldstmt), gimple_bb (stmt)));
}
/* Store is on a different code path. */
lp->stmts.safe_push (stmt);
}
}
/* Gimplify the address of a TARGET_MEM_REF. Return the SSA_NAME
result, insert the new statements before GSI. */
static tree
gimplify_addr (gimple_stmt_iterator *gsi, tree x)
{
if (TREE_CODE (x) == TARGET_MEM_REF)
x = tree_mem_ref_addr (build_pointer_type (TREE_TYPE (x)), x);
else
x = build_fold_addr_expr (x);
return force_gimple_operand_gsi (gsi, x, true, NULL, true, GSI_SAME_STMT);
}
/* Instrument one address with the logging functions.
ADDR is the address to save.
STMT is the statement before which to place it. */
static void
tm_log_emit_stmt (tree addr, gimple stmt)
{
tree type = TREE_TYPE (addr);
tree size = TYPE_SIZE_UNIT (type);
gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
gimple log;
enum built_in_function code = BUILT_IN_TM_LOG;
if (type == float_type_node)
code = BUILT_IN_TM_LOG_FLOAT;
else if (type == double_type_node)
code = BUILT_IN_TM_LOG_DOUBLE;
else if (type == long_double_type_node)
code = BUILT_IN_TM_LOG_LDOUBLE;
else if (host_integerp (size, 1))
{
unsigned int n = tree_low_cst (size, 1);
switch (n)
{
case 1:
code = BUILT_IN_TM_LOG_1;
break;
case 2:
code = BUILT_IN_TM_LOG_2;
break;
case 4:
code = BUILT_IN_TM_LOG_4;
break;
case 8:
code = BUILT_IN_TM_LOG_8;
break;
default:
code = BUILT_IN_TM_LOG;
if (TREE_CODE (type) == VECTOR_TYPE)
{
if (n == 8 && builtin_decl_explicit (BUILT_IN_TM_LOG_M64))
code = BUILT_IN_TM_LOG_M64;
else if (n == 16 && builtin_decl_explicit (BUILT_IN_TM_LOG_M128))
code = BUILT_IN_TM_LOG_M128;
else if (n == 32 && builtin_decl_explicit (BUILT_IN_TM_LOG_M256))
code = BUILT_IN_TM_LOG_M256;
}
break;
}
}
addr = gimplify_addr (&gsi, addr);
if (code == BUILT_IN_TM_LOG)
log = gimple_build_call (builtin_decl_explicit (code), 2, addr, size);
else
log = gimple_build_call (builtin_decl_explicit (code), 1, addr);
gsi_insert_before (&gsi, log, GSI_SAME_STMT);
}
/* Go through the log and instrument address that must be instrumented
with the logging functions. Leave the save/restore addresses for
later. */
static void
tm_log_emit (void)
{
hash_table ::iterator hi;
struct tm_log_entry *lp;
FOR_EACH_HASH_TABLE_ELEMENT (tm_log, lp, tm_log_entry_t, hi)
{
size_t i;
gimple stmt;
if (dump_file)
{
fprintf (dump_file, "TM thread private mem logging: ");
print_generic_expr (dump_file, lp->addr, 0);
fprintf (dump_file, "\n");
}
if (lp->save_var)
{
if (dump_file)
fprintf (dump_file, "DUMPING to variable\n");
continue;
}
else
{
if (dump_file)
fprintf (dump_file, "DUMPING with logging functions\n");
for (i = 0; lp->stmts.iterate (i, &stmt); ++i)
tm_log_emit_stmt (lp->addr, stmt);
}
}
}
/* Emit the save sequence for the corresponding addresses in the log.
ENTRY_BLOCK is the entry block for the transaction.
BB is the basic block to insert the code in. */
static void
tm_log_emit_saves (basic_block entry_block, basic_block bb)
{
size_t i;
gimple_stmt_iterator gsi = gsi_last_bb (bb);
gimple stmt;
struct tm_log_entry l, *lp;
for (i = 0; i < tm_log_save_addresses.length (); ++i)
{
l.addr = tm_log_save_addresses[i];
lp = *(tm_log.find_slot (&l, NO_INSERT));
gcc_assert (lp->save_var != NULL);
/* We only care about variables in the current transaction. */
if (lp->entry_block != entry_block)
continue;
stmt = gimple_build_assign (lp->save_var, unshare_expr (lp->addr));
/* Make sure we can create an SSA_NAME for this type. For
instance, aggregates aren't allowed, in which case the system
will create a VOP for us and everything will just work. */
if (is_gimple_reg_type (TREE_TYPE (lp->save_var)))
{
lp->save_var = make_ssa_name (lp->save_var, stmt);
gimple_assign_set_lhs (stmt, lp->save_var);
}
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
}
}
/* Emit the restore sequence for the corresponding addresses in the log.
ENTRY_BLOCK is the entry block for the transaction.
BB is the basic block to insert the code in. */
static void
tm_log_emit_restores (basic_block entry_block, basic_block bb)
{
int i;
struct tm_log_entry l, *lp;
gimple_stmt_iterator gsi;
gimple stmt;
for (i = tm_log_save_addresses.length () - 1; i >= 0; i--)
{
l.addr = tm_log_save_addresses[i];
lp = *(tm_log.find_slot (&l, NO_INSERT));
gcc_assert (lp->save_var != NULL);
/* We only care about variables in the current transaction. */
if (lp->entry_block != entry_block)
continue;
/* Restores are in LIFO order from the saves in case we have
overlaps. */
gsi = gsi_start_bb (bb);
stmt = gimple_build_assign (unshare_expr (lp->addr), lp->save_var);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
}
static tree lower_sequence_tm (gimple_stmt_iterator *, bool *,
struct walk_stmt_info *);
static tree lower_sequence_no_tm (gimple_stmt_iterator *, bool *,
struct walk_stmt_info *);
/* Evaluate an address X being dereferenced and determine if it
originally points to a non aliased new chunk of memory (malloc,
alloca, etc).
Return MEM_THREAD_LOCAL if it points to a thread-local address.
Return MEM_TRANSACTION_LOCAL if it points to a transaction-local address.
Return MEM_NON_LOCAL otherwise.
ENTRY_BLOCK is the entry block to the transaction containing the
dereference of X. */
static enum thread_memory_type
thread_private_new_memory (basic_block entry_block, tree x)
{
gimple stmt = NULL;
enum tree_code code;
tm_new_mem_map_t **slot;
tm_new_mem_map_t elt, *elt_p;
tree val = x;
enum thread_memory_type retval = mem_transaction_local;
if (!entry_block
|| TREE_CODE (x) != SSA_NAME
/* Possible uninitialized use, or a function argument. In
either case, we don't care. */
|| SSA_NAME_IS_DEFAULT_DEF (x))
return mem_non_local;
/* Look in cache first. */
elt.val = x;
slot = tm_new_mem_hash.find_slot (&elt, INSERT);
elt_p = *slot;
if (elt_p)
return elt_p->local_new_memory;
/* Optimistically assume the memory is transaction local during
processing. This catches recursion into this variable. */
*slot = elt_p = XNEW (tm_new_mem_map_t);
elt_p->val = val;
elt_p->local_new_memory = mem_transaction_local;
/* Search DEF chain to find the original definition of this address. */
do
{
if (ptr_deref_may_alias_global_p (x))
{
/* Address escapes. This is not thread-private. */
retval = mem_non_local;
goto new_memory_ret;
}
stmt = SSA_NAME_DEF_STMT (x);
/* If the malloc call is outside the transaction, this is
thread-local. */
if (retval != mem_thread_local
&& !dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt), entry_block))
retval = mem_thread_local;
if (is_gimple_assign (stmt))
{
code = gimple_assign_rhs_code (stmt);
/* x = foo ==> foo */
if (code == SSA_NAME)
x = gimple_assign_rhs1 (stmt);
/* x = foo + n ==> foo */
else if (code == POINTER_PLUS_EXPR)
x = gimple_assign_rhs1 (stmt);
/* x = (cast*) foo ==> foo */
else if (code == VIEW_CONVERT_EXPR || code == NOP_EXPR)
x = gimple_assign_rhs1 (stmt);
/* x = c ? op1 : op2 == > op1 or op2 just like a PHI */
else if (code == COND_EXPR)
{
tree op1 = gimple_assign_rhs2 (stmt);
tree op2 = gimple_assign_rhs3 (stmt);
enum thread_memory_type mem;
retval = thread_private_new_memory (entry_block, op1);
if (retval == mem_non_local)
goto new_memory_ret;
mem = thread_private_new_memory (entry_block, op2);
retval = MIN (retval, mem);
goto new_memory_ret;
}
else
{
retval = mem_non_local;
goto new_memory_ret;
}
}
else
{
if (gimple_code (stmt) == GIMPLE_PHI)
{
unsigned int i;
enum thread_memory_type mem;
tree phi_result = gimple_phi_result (stmt);
/* If any of the ancestors are non-local, we are sure to
be non-local. Otherwise we can avoid doing anything
and inherit what has already been generated. */
retval = mem_max;
for (i = 0; i < gimple_phi_num_args (stmt); ++i)
{
tree op = PHI_ARG_DEF (stmt, i);
/* Exclude self-assignment. */
if (phi_result == op)
continue;
mem = thread_private_new_memory (entry_block, op);
if (mem == mem_non_local)
{
retval = mem;
goto new_memory_ret;
}
retval = MIN (retval, mem);
}
goto new_memory_ret;
}
break;
}
}
while (TREE_CODE (x) == SSA_NAME);
if (stmt && is_gimple_call (stmt) && gimple_call_flags (stmt) & ECF_MALLOC)
/* Thread-local or transaction-local. */
;
else
retval = mem_non_local;
new_memory_ret:
elt_p->local_new_memory = retval;
return retval;
}
/* Determine whether X has to be instrumented using a read
or write barrier.
ENTRY_BLOCK is the entry block for the region where stmt resides
in. NULL if unknown.
STMT is the statement in which X occurs in. It is used for thread
private memory instrumentation. If no TPM instrumentation is
desired, STMT should be null. */
static bool
requires_barrier (basic_block entry_block, tree x, gimple stmt)
{
tree orig = x;
while (handled_component_p (x))
x = TREE_OPERAND (x, 0);
switch (TREE_CODE (x))
{
case INDIRECT_REF:
case MEM_REF:
{
enum thread_memory_type ret;
ret = thread_private_new_memory (entry_block, TREE_OPERAND (x, 0));
if (ret == mem_non_local)
return true;
if (stmt && ret == mem_thread_local)
/* ?? Should we pass `orig', or the INDIRECT_REF X. ?? */
tm_log_add (entry_block, orig, stmt);
/* Transaction-locals require nothing at all. For malloc, a
transaction restart frees the memory and we reallocate.
For alloca, the stack pointer gets reset by the retry and
we reallocate. */
return false;
}
case TARGET_MEM_REF:
if (TREE_CODE (TMR_BASE (x)) != ADDR_EXPR)
return true;
x = TREE_OPERAND (TMR_BASE (x), 0);
if (TREE_CODE (x) == PARM_DECL)
return false;
gcc_assert (TREE_CODE (x) == VAR_DECL);
/* FALLTHRU */
case PARM_DECL:
case RESULT_DECL:
case VAR_DECL:
if (DECL_BY_REFERENCE (x))
{
/* ??? This value is a pointer, but aggregate_value_p has been
jigged to return true which confuses needs_to_live_in_memory.
This ought to be cleaned up generically.
FIXME: Verify this still happens after the next mainline
merge. Testcase ie g++.dg/tm/pr47554.C.
*/
return false;
}
if (is_global_var (x))
return !TREE_READONLY (x);
if (/* FIXME: This condition should actually go below in the
tm_log_add() call, however is_call_clobbered() depends on
aliasing info which is not available during
gimplification. Since requires_barrier() gets called
during lower_sequence_tm/gimplification, leave the call
to needs_to_live_in_memory until we eliminate
lower_sequence_tm altogether. */
needs_to_live_in_memory (x))
return true;
else
{
/* For local memory that doesn't escape (aka thread private
memory), we can either save the value at the beginning of
the transaction and restore on restart, or call a tm
function to dynamically save and restore on restart
(ITM_L*). */
if (stmt)
tm_log_add (entry_block, orig, stmt);
return false;
}
default:
return false;
}
}
/* Mark the GIMPLE_ASSIGN statement as appropriate for being inside
a transaction region. */
static void
examine_assign_tm (unsigned *state, gimple_stmt_iterator *gsi)
{
gimple stmt = gsi_stmt (*gsi);
if (requires_barrier (/*entry_block=*/NULL, gimple_assign_rhs1 (stmt), NULL))
*state |= GTMA_HAVE_LOAD;
if (requires_barrier (/*entry_block=*/NULL, gimple_assign_lhs (stmt), NULL))
*state |= GTMA_HAVE_STORE;
}
/* Mark a GIMPLE_CALL as appropriate for being inside a transaction. */
static void
examine_call_tm (unsigned *state, gimple_stmt_iterator *gsi)
{
gimple stmt = gsi_stmt (*gsi);
tree fn;
if (is_tm_pure_call (stmt))
return;
/* Check if this call is a transaction abort. */
fn = gimple_call_fndecl (stmt);
if (is_tm_abort (fn))
*state |= GTMA_HAVE_ABORT;
/* Note that something may happen. */
*state |= GTMA_HAVE_LOAD | GTMA_HAVE_STORE;
}
/* Lower a GIMPLE_TRANSACTION statement. */
static void
lower_transaction (gimple_stmt_iterator *gsi, struct walk_stmt_info *wi)
{
gimple g, stmt = gsi_stmt (*gsi);
unsigned int *outer_state = (unsigned int *) wi->info;
unsigned int this_state = 0;
struct walk_stmt_info this_wi;
/* First, lower the body. The scanning that we do inside gives
us some idea of what we're dealing with. */
memset (&this_wi, 0, sizeof (this_wi));
this_wi.info = (void *) &this_state;
walk_gimple_seq_mod (gimple_transaction_body_ptr (stmt),
lower_sequence_tm, NULL, &this_wi);
/* If there was absolutely nothing transaction related inside the
transaction, we may elide it. Likewise if this is a nested
transaction and does not contain an abort. */
if (this_state == 0
|| (!(this_state & GTMA_HAVE_ABORT) && outer_state != NULL))
{
if (outer_state)
*outer_state |= this_state;
gsi_insert_seq_before (gsi, gimple_transaction_body (stmt),
GSI_SAME_STMT);
gimple_transaction_set_body (stmt, NULL);
gsi_remove (gsi, true);
wi->removed_stmt = true;
return;
}
/* Wrap the body of the transaction in a try-finally node so that
the commit call is always properly called. */
g = gimple_build_call (builtin_decl_explicit (BUILT_IN_TM_COMMIT), 0);
if (flag_exceptions)
{
tree ptr;
gimple_seq n_seq, e_seq;
n_seq = gimple_seq_alloc_with_stmt (g);
e_seq = NULL;
g = gimple_build_call (builtin_decl_explicit (BUILT_IN_EH_POINTER),
1, integer_zero_node);
ptr = create_tmp_var (ptr_type_node, NULL);
gimple_call_set_lhs (g, ptr);
gimple_seq_add_stmt (&e_seq, g);
g = gimple_build_call (builtin_decl_explicit (BUILT_IN_TM_COMMIT_EH),
1, ptr);
gimple_seq_add_stmt (&e_seq, g);
g = gimple_build_eh_else (n_seq, e_seq);
}
g = gimple_build_try (gimple_transaction_body (stmt),
gimple_seq_alloc_with_stmt (g), GIMPLE_TRY_FINALLY);
gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
gimple_transaction_set_body (stmt, NULL);
/* If the transaction calls abort or if this is an outer transaction,
add an "over" label afterwards. */
if ((this_state & (GTMA_HAVE_ABORT))
|| (gimple_transaction_subcode(stmt) & GTMA_IS_OUTER))
{
tree label = create_artificial_label (UNKNOWN_LOCATION);
gimple_transaction_set_label (stmt, label);
gsi_insert_after (gsi, gimple_build_label (label), GSI_CONTINUE_LINKING);
}
/* Record the set of operations found for use later. */
this_state |= gimple_transaction_subcode (stmt) & GTMA_DECLARATION_MASK;
gimple_transaction_set_subcode (stmt, this_state);
}
/* Iterate through the statements in the sequence, lowering them all
as appropriate for being in a transaction. */
static tree
lower_sequence_tm (gimple_stmt_iterator *gsi, bool *handled_ops_p,
struct walk_stmt_info *wi)
{
unsigned int *state = (unsigned int *) wi->info;
gimple stmt = gsi_stmt (*gsi);
*handled_ops_p = true;
switch (gimple_code (stmt))
{
case GIMPLE_ASSIGN:
/* Only memory reads/writes need to be instrumented. */
if (gimple_assign_single_p (stmt))
examine_assign_tm (state, gsi);
break;
case GIMPLE_CALL:
examine_call_tm (state, gsi);
break;
case GIMPLE_ASM:
*state |= GTMA_MAY_ENTER_IRREVOCABLE;
break;
case GIMPLE_TRANSACTION:
lower_transaction (gsi, wi);
break;
default:
*handled_ops_p = !gimple_has_substatements (stmt);
break;
}
return NULL_TREE;
}
/* Iterate through the statements in the sequence, lowering them all
as appropriate for being outside of a transaction. */
static tree
lower_sequence_no_tm (gimple_stmt_iterator *gsi, bool *handled_ops_p,
struct walk_stmt_info * wi)
{
gimple stmt = gsi_stmt (*gsi);
if (gimple_code (stmt) == GIMPLE_TRANSACTION)
{
*handled_ops_p = true;
lower_transaction (gsi, wi);
}
else
*handled_ops_p = !gimple_has_substatements (stmt);
return NULL_TREE;
}
/* Main entry point for flattening GIMPLE_TRANSACTION constructs. After
this, GIMPLE_TRANSACTION nodes still exist, but the nested body has
been moved out, and all the data required for constructing a proper
CFG has been recorded. */
static unsigned int
execute_lower_tm (void)
{
struct walk_stmt_info wi;
gimple_seq body;
/* Transactional clones aren't created until a later pass. */
gcc_assert (!decl_is_tm_clone (current_function_decl));
body = gimple_body (current_function_decl);
memset (&wi, 0, sizeof (wi));
walk_gimple_seq_mod (&body, lower_sequence_no_tm, NULL, &wi);
gimple_set_body (current_function_decl, body);
return 0;
}
struct gimple_opt_pass pass_lower_tm =
{
{
GIMPLE_PASS,
"tmlower", /* name */
OPTGROUP_NONE, /* optinfo_flags */
gate_tm, /* gate */
execute_lower_tm, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_TRANS_MEM, /* tv_id */
PROP_gimple_lcf, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
}
};
/* Collect region information for each transaction. */
struct tm_region
{
/* Link to the next unnested transaction. */
struct tm_region *next;
/* Link to the next inner transaction. */
struct tm_region *inner;
/* Link to the next outer transaction. */
struct tm_region *outer;
/* The GIMPLE_TRANSACTION statement beginning this transaction.
After TM_MARK, this gets replaced by a call to
BUILT_IN_TM_START. */
gimple transaction_stmt;
/* After TM_MARK expands the GIMPLE_TRANSACTION into a call to
BUILT_IN_TM_START, this field is true if the transaction is an
outer transaction. */
bool original_transaction_was_outer;
/* Return value from BUILT_IN_TM_START. */
tree tm_state;
/* The entry block to this region. This will always be the first
block of the body of the transaction. */
basic_block entry_block;
/* The first block after an expanded call to _ITM_beginTransaction. */
basic_block restart_block;
/* The set of all blocks that end the region; NULL if only EXIT_BLOCK.
These blocks are still a part of the region (i.e., the border is
inclusive). Note that this set is only complete for paths in the CFG
starting at ENTRY_BLOCK, and that there is no exit block recorded for
the edge to the "over" label. */
bitmap exit_blocks;
/* The set of all blocks that have an TM_IRREVOCABLE call. */
bitmap irr_blocks;
};
typedef struct tm_region *tm_region_p;
/* True if there are pending edge statements to be committed for the
current function being scanned in the tmmark pass. */
bool pending_edge_inserts_p;
static struct tm_region *all_tm_regions;
static bitmap_obstack tm_obstack;
/* A subroutine of tm_region_init. Record the existence of the
GIMPLE_TRANSACTION statement in a tree of tm_region elements. */
static struct tm_region *
tm_region_init_0 (struct tm_region *outer, basic_block bb, gimple stmt)
{
struct tm_region *region;
region = (struct tm_region *)
obstack_alloc (&tm_obstack.obstack, sizeof (struct tm_region));
if (outer)
{
region->next = outer->inner;
outer->inner = region;
}
else
{
region->next = all_tm_regions;
all_tm_regions = region;
}
region->inner = NULL;
region->outer = outer;
region->transaction_stmt = stmt;
region->original_transaction_was_outer = false;
region->tm_state = NULL;
/* There are either one or two edges out of the block containing
the GIMPLE_TRANSACTION, one to the actual region and one to the
"over" label if the region contains an abort. The former will
always be the one marked FALLTHRU. */
region->entry_block = FALLTHRU_EDGE (bb)->dest;
region->exit_blocks = BITMAP_ALLOC (&tm_obstack);
region->irr_blocks = BITMAP_ALLOC (&tm_obstack);
return region;
}
/* A subroutine of tm_region_init. Record all the exit and
irrevocable blocks in BB into the region's exit_blocks and
irr_blocks bitmaps. Returns the new region being scanned. */
static struct tm_region *
tm_region_init_1 (struct tm_region *region, basic_block bb)
{
gimple_stmt_iterator gsi;
gimple g;
if (!region
|| (!region->irr_blocks && !region->exit_blocks))
return region;
/* Check to see if this is the end of a region by seeing if it
contains a call to __builtin_tm_commit{,_eh}. Note that the
outermost region for DECL_IS_TM_CLONE need not collect this. */
for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
{
g = gsi_stmt (gsi);
if (gimple_code (g) == GIMPLE_CALL)
{
tree fn = gimple_call_fndecl (g);
if (fn && DECL_BUILT_IN_CLASS (fn) == BUILT_IN_NORMAL)
{
if ((DECL_FUNCTION_CODE (fn) == BUILT_IN_TM_COMMIT
|| DECL_FUNCTION_CODE (fn) == BUILT_IN_TM_COMMIT_EH)
&& region->exit_blocks)
{
bitmap_set_bit (region->exit_blocks, bb->index);
region = region->outer;
break;
}
if (DECL_FUNCTION_CODE (fn) == BUILT_IN_TM_IRREVOCABLE)
bitmap_set_bit (region->irr_blocks, bb->index);
}
}
}
return region;
}
/* Collect all of the transaction regions within the current function
and record them in ALL_TM_REGIONS. The REGION parameter may specify
an "outermost" region for use by tm clones. */
static void
tm_region_init (struct tm_region *region)
{
gimple g;
edge_iterator ei;
edge e;
basic_block bb;
vec queue = vNULL;
bitmap visited_blocks = BITMAP_ALLOC (NULL);
struct tm_region *old_region;
vec bb_regions = vNULL;
all_tm_regions = region;
bb = single_succ (ENTRY_BLOCK_PTR);
/* We could store this information in bb->aux, but we may get called
through get_all_tm_blocks() from another pass that may be already
using bb->aux. */
bb_regions.safe_grow_cleared (last_basic_block);
queue.safe_push (bb);
bb_regions[bb->index] = region;
do
{
bb = queue.pop ();
region = bb_regions[bb->index];
bb_regions[bb->index] = NULL;
/* Record exit and irrevocable blocks. */
region = tm_region_init_1 (region, bb);
/* Check for the last statement in the block beginning a new region. */
g = last_stmt (bb);
old_region = region;
if (g && gimple_code (g) == GIMPLE_TRANSACTION)
region = tm_region_init_0 (region, bb, g);
/* Process subsequent blocks. */
FOR_EACH_EDGE (e, ei, bb->succs)
if (!bitmap_bit_p (visited_blocks, e->dest->index))
{
bitmap_set_bit (visited_blocks, e->dest->index);
queue.safe_push (e->dest);
/* If the current block started a new region, make sure that only
the entry block of the new region is associated with this region.
Other successors are still part of the old region. */
if (old_region != region && e->dest != region->entry_block)
bb_regions[e->dest->index] = old_region;
else
bb_regions[e->dest->index] = region;
}
}
while (!queue.is_empty ());
queue.release ();
BITMAP_FREE (visited_blocks);
bb_regions.release ();
}
/* The "gate" function for all transactional memory expansion and optimization
passes. We collect region information for each top-level transaction, and
if we don't find any, we skip all of the TM passes. Each region will have
all of the exit blocks recorded, and the originating statement. */
static bool
gate_tm_init (void)
{
if (!flag_tm)
return false;
calculate_dominance_info (CDI_DOMINATORS);
bitmap_obstack_initialize (&tm_obstack);
/* If the function is a TM_CLONE, then the entire function is the region. */
if (decl_is_tm_clone (current_function_decl))
{
struct tm_region *region = (struct tm_region *)
obstack_alloc (&tm_obstack.obstack, sizeof (struct tm_region));
memset (region, 0, sizeof (*region));
region->entry_block = single_succ (ENTRY_BLOCK_PTR);
/* For a clone, the entire function is the region. But even if
we don't need to record any exit blocks, we may need to
record irrevocable blocks. */
region->irr_blocks = BITMAP_ALLOC (&tm_obstack);
tm_region_init (region);
}
else
{
tm_region_init (NULL);
/* If we didn't find any regions, cleanup and skip the whole tree
of tm-related optimizations. */
if (all_tm_regions == NULL)
{
bitmap_obstack_release (&tm_obstack);
return false;
}
}
return true;
}
struct gimple_opt_pass pass_tm_init =
{
{
GIMPLE_PASS,
"*tminit", /* name */
OPTGROUP_NONE, /* optinfo_flags */
gate_tm_init, /* gate */
NULL, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_TRANS_MEM, /* tv_id */
PROP_ssa | PROP_cfg, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
}
};
/* Add FLAGS to the GIMPLE_TRANSACTION subcode for the transaction region
represented by STATE. */
static inline void
transaction_subcode_ior (struct tm_region *region, unsigned flags)
{
if (region && region->transaction_stmt)
{
flags |= gimple_transaction_subcode (region->transaction_stmt);
gimple_transaction_set_subcode (region->transaction_stmt, flags);
}
}
/* Construct a memory load in a transactional context. Return the
gimple statement performing the load, or NULL if there is no
TM_LOAD builtin of the appropriate size to do the load.
LOC is the location to use for the new statement(s). */
static gimple
build_tm_load (location_t loc, tree lhs, tree rhs, gimple_stmt_iterator *gsi)
{
enum built_in_function code = END_BUILTINS;
tree t, type = TREE_TYPE (rhs), decl;
gimple gcall;
if (type == float_type_node)
code = BUILT_IN_TM_LOAD_FLOAT;
else if (type == double_type_node)
code = BUILT_IN_TM_LOAD_DOUBLE;
else if (type == long_double_type_node)
code = BUILT_IN_TM_LOAD_LDOUBLE;
else if (TYPE_SIZE_UNIT (type) != NULL
&& host_integerp (TYPE_SIZE_UNIT (type), 1))
{
switch (tree_low_cst (TYPE_SIZE_UNIT (type), 1))
{
case 1:
code = BUILT_IN_TM_LOAD_1;
break;
case 2:
code = BUILT_IN_TM_LOAD_2;
break;
case 4:
code = BUILT_IN_TM_LOAD_4;
break;
case 8:
code = BUILT_IN_TM_LOAD_8;
break;
}
}
if (code == END_BUILTINS)
{
decl = targetm.vectorize.builtin_tm_load (type);
if (!decl)
return NULL;
}
else
decl = builtin_decl_explicit (code);
t = gimplify_addr (gsi, rhs);
gcall = gimple_build_call (decl, 1, t);
gimple_set_location (gcall, loc);
t = TREE_TYPE (TREE_TYPE (decl));
if (useless_type_conversion_p (type, t))
{
gimple_call_set_lhs (gcall, lhs);
gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
}
else
{
gimple g;
tree temp;
temp = create_tmp_reg (t, NULL);
gimple_call_set_lhs (gcall, temp);
gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
t = fold_build1 (VIEW_CONVERT_EXPR, type, temp);
g = gimple_build_assign (lhs, t);
gsi_insert_before (gsi, g, GSI_SAME_STMT);
}
return gcall;
}
/* Similarly for storing TYPE in a transactional context. */
static gimple
build_tm_store (location_t loc, tree lhs, tree rhs, gimple_stmt_iterator *gsi)
{
enum built_in_function code = END_BUILTINS;
tree t, fn, type = TREE_TYPE (rhs), simple_type;
gimple gcall;
if (type == float_type_node)
code = BUILT_IN_TM_STORE_FLOAT;
else if (type == double_type_node)
code = BUILT_IN_TM_STORE_DOUBLE;
else if (type == long_double_type_node)
code = BUILT_IN_TM_STORE_LDOUBLE;
else if (TYPE_SIZE_UNIT (type) != NULL
&& host_integerp (TYPE_SIZE_UNIT (type), 1))
{
switch (tree_low_cst (TYPE_SIZE_UNIT (type), 1))
{
case 1:
code = BUILT_IN_TM_STORE_1;
break;
case 2:
code = BUILT_IN_TM_STORE_2;
break;
case 4:
code = BUILT_IN_TM_STORE_4;
break;
case 8:
code = BUILT_IN_TM_STORE_8;
break;
}
}
if (code == END_BUILTINS)
{
fn = targetm.vectorize.builtin_tm_store (type);
if (!fn)
return NULL;
}
else
fn = builtin_decl_explicit (code);
simple_type = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (TREE_TYPE (fn))));
if (TREE_CODE (rhs) == CONSTRUCTOR)
{
/* Handle the easy initialization to zero. */
if (!CONSTRUCTOR_ELTS (rhs))
rhs = build_int_cst (simple_type, 0);
else
{
/* ...otherwise punt to the caller and probably use
BUILT_IN_TM_MEMMOVE, because we can't wrap a
VIEW_CONVERT_EXPR around a CONSTRUCTOR (below) and produce
valid gimple. */
return NULL;
}
}
else if (!useless_type_conversion_p (simple_type, type))
{
gimple g;
tree temp;
temp = create_tmp_reg (simple_type, NULL);
t = fold_build1 (VIEW_CONVERT_EXPR, simple_type, rhs);
g = gimple_build_assign (temp, t);
gimple_set_location (g, loc);
gsi_insert_before (gsi, g, GSI_SAME_STMT);
rhs = temp;
}
t = gimplify_addr (gsi, lhs);
gcall = gimple_build_call (fn, 2, t, rhs);
gimple_set_location (gcall, loc);
gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
return gcall;
}
/* Expand an assignment statement into transactional builtins. */
static void
expand_assign_tm (struct tm_region *region, gimple_stmt_iterator *gsi)
{
gimple stmt = gsi_stmt (*gsi);
location_t loc = gimple_location (stmt);
tree lhs = gimple_assign_lhs (stmt);
tree rhs = gimple_assign_rhs1 (stmt);
bool store_p = requires_barrier (region->entry_block, lhs, NULL);
bool load_p = requires_barrier (region->entry_block, rhs, NULL);
gimple gcall = NULL;
if (!load_p && !store_p)
{
/* Add thread private addresses to log if applicable. */
requires_barrier (region->entry_block, lhs, stmt);
gsi_next (gsi);
return;
}
// Remove original load/store statement.
gsi_remove (gsi, true);
if (load_p && !store_p)
{
transaction_subcode_ior (region, GTMA_HAVE_LOAD);
gcall = build_tm_load (loc, lhs, rhs, gsi);
}
else if (store_p && !load_p)
{
transaction_subcode_ior (region, GTMA_HAVE_STORE);
gcall = build_tm_store (loc, lhs, rhs, gsi);
}
if (!gcall)
{
tree lhs_addr, rhs_addr, tmp;
if (load_p)
transaction_subcode_ior (region, GTMA_HAVE_LOAD);
if (store_p)
transaction_subcode_ior (region, GTMA_HAVE_STORE);
/* ??? Figure out if there's any possible overlap between the LHS
and the RHS and if not, use MEMCPY. */
if (load_p && is_gimple_reg (lhs))
{
tmp = create_tmp_var (TREE_TYPE (lhs), NULL);
lhs_addr = build_fold_addr_expr (tmp);
}
else
{
tmp = NULL_TREE;
lhs_addr = gimplify_addr (gsi, lhs);
}
rhs_addr = gimplify_addr (gsi, rhs);
gcall = gimple_build_call (builtin_decl_explicit (BUILT_IN_TM_MEMMOVE),
3, lhs_addr, rhs_addr,
TYPE_SIZE_UNIT (TREE_TYPE (lhs)));
gimple_set_location (gcall, loc);
gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
if (tmp)
{
gcall = gimple_build_assign (lhs, tmp);
gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
}
}
/* Now that we have the load/store in its instrumented form, add
thread private addresses to the log if applicable. */
if (!store_p)
requires_barrier (region->entry_block, lhs, gcall);
// The calls to build_tm_{store,load} above inserted the instrumented
// call into the stream.
// gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
}
/* Expand a call statement as appropriate for a transaction. That is,
either verify that the call does not affect the transaction, or
redirect the call to a clone that handles transactions, or change
the transaction state to IRREVOCABLE. Return true if the call is
one of the builtins that end a transaction. */
static bool
expand_call_tm (struct tm_region *region,
gimple_stmt_iterator *gsi)
{
gimple stmt = gsi_stmt (*gsi);
tree lhs = gimple_call_lhs (stmt);
tree fn_decl;
struct cgraph_node *node;
bool retval = false;
fn_decl = gimple_call_fndecl (stmt);
if (fn_decl == builtin_decl_explicit (BUILT_IN_TM_MEMCPY)
|| fn_decl == builtin_decl_explicit (BUILT_IN_TM_MEMMOVE))
transaction_subcode_ior (region, GTMA_HAVE_STORE | GTMA_HAVE_LOAD);
if (fn_decl == builtin_decl_explicit (BUILT_IN_TM_MEMSET))
transaction_subcode_ior (region, GTMA_HAVE_STORE);
if (is_tm_pure_call (stmt))
return false;
if (fn_decl)
retval = is_tm_ending_fndecl (fn_decl);
if (!retval)
{
/* Assume all non-const/pure calls write to memory, except
transaction ending builtins. */
transaction_subcode_ior (region, GTMA_HAVE_STORE);
}
/* For indirect calls, we already generated a call into the runtime. */
if (!fn_decl)
{
tree fn = gimple_call_fn (stmt);
/* We are guaranteed never to go irrevocable on a safe or pure
call, and the pure call was handled above. */
if (is_tm_safe (fn))
return false;
else
transaction_subcode_ior (region, GTMA_MAY_ENTER_IRREVOCABLE);
return false;
}
node = cgraph_get_node (fn_decl);
/* All calls should have cgraph here. */
if (!node)
{
/* We can have a nodeless call here if some pass after IPA-tm
added uninstrumented calls. For example, loop distribution
can transform certain loop constructs into __builtin_mem*
calls. In this case, see if we have a suitable TM
replacement and fill in the gaps. */
gcc_assert (DECL_BUILT_IN_CLASS (fn_decl) == BUILT_IN_NORMAL);
enum built_in_function code = DECL_FUNCTION_CODE (fn_decl);
gcc_assert (code == BUILT_IN_MEMCPY
|| code == BUILT_IN_MEMMOVE
|| code == BUILT_IN_MEMSET);
tree repl = find_tm_replacement_function (fn_decl);
if (repl)
{
gimple_call_set_fndecl (stmt, repl);
update_stmt (stmt);
node = cgraph_create_node (repl);
node->local.tm_may_enter_irr = false;
return expand_call_tm (region, gsi);
}
gcc_unreachable ();
}
if (node->local.tm_may_enter_irr)
transaction_subcode_ior (region, GTMA_MAY_ENTER_IRREVOCABLE);
if (is_tm_abort (fn_decl))
{
transaction_subcode_ior (region, GTMA_HAVE_ABORT);
return true;
}
/* Instrument the store if needed.
If the assignment happens inside the function call (return slot
optimization), there is no instrumentation to be done, since
the callee should have done the right thing. */
if (lhs && requires_barrier (region->entry_block, lhs, stmt)
&& !gimple_call_return_slot_opt_p (stmt))
{
tree tmp = create_tmp_reg (TREE_TYPE (lhs), NULL);
location_t loc = gimple_location (stmt);
edge fallthru_edge = NULL;
/* Remember if the call was going to throw. */
if (stmt_can_throw_internal (stmt))
{
edge_iterator ei;
edge e;
basic_block bb = gimple_bb (stmt);
FOR_EACH_EDGE (e, ei, bb->succs)
if (e->flags & EDGE_FALLTHRU)
{
fallthru_edge = e;
break;
}
}
gimple_call_set_lhs (stmt, tmp);
update_stmt (stmt);
stmt = gimple_build_assign (lhs, tmp);
gimple_set_location (stmt, loc);
/* We cannot throw in the middle of a BB. If the call was going
to throw, place the instrumentation on the fallthru edge, so
the call remains the last statement in the block. */
if (fallthru_edge)
{
gimple_seq fallthru_seq = gimple_seq_alloc_with_stmt (stmt);
gimple_stmt_iterator fallthru_gsi = gsi_start (fallthru_seq);
expand_assign_tm (region, &fallthru_gsi);
gsi_insert_seq_on_edge (fallthru_edge, fallthru_seq);
pending_edge_inserts_p = true;
}
else
{
gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
expand_assign_tm (region, gsi);
}
transaction_subcode_ior (region, GTMA_HAVE_STORE);
}
return retval;
}
/* Expand all statements in BB as appropriate for being inside
a transaction. */
static void
expand_block_tm (struct tm_region *region, basic_block bb)
{
gimple_stmt_iterator gsi;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
{
gimple stmt = gsi_stmt (gsi);
switch (gimple_code (stmt))
{
case GIMPLE_ASSIGN:
/* Only memory reads/writes need to be instrumented. */
if (gimple_assign_single_p (stmt)
&& !gimple_clobber_p (stmt))
{
expand_assign_tm (region, &gsi);
continue;
}
break;
case GIMPLE_CALL:
if (expand_call_tm (region, &gsi))
return;
break;
case GIMPLE_ASM:
gcc_unreachable ();
default:
break;
}
if (!gsi_end_p (gsi))
gsi_next (&gsi);
}
}
/* Return the list of basic-blocks in REGION.
STOP_AT_IRREVOCABLE_P is true if caller is uninterested in blocks
following a TM_IRREVOCABLE call.
INCLUDE_UNINSTRUMENTED_P is TRUE if we should include the
uninstrumented code path blocks in the list of basic blocks
returned, false otherwise. */
static vec
get_tm_region_blocks (basic_block entry_block,
bitmap exit_blocks,
bitmap irr_blocks,
bitmap all_region_blocks,
bool stop_at_irrevocable_p,
bool include_uninstrumented_p = true)
{
vec bbs = vNULL;
unsigned i;
edge e;
edge_iterator ei;
bitmap visited_blocks = BITMAP_ALLOC (NULL);
i = 0;
bbs.safe_push (entry_block);
bitmap_set_bit (visited_blocks, entry_block->index);
do
{
basic_block bb = bbs[i++];
if (exit_blocks &&
bitmap_bit_p (exit_blocks, bb->index))
continue;
if (stop_at_irrevocable_p
&& irr_blocks
&& bitmap_bit_p (irr_blocks, bb->index))
continue;
FOR_EACH_EDGE (e, ei, bb->succs)
if ((include_uninstrumented_p
|| !(e->flags & EDGE_TM_UNINSTRUMENTED))
&& !bitmap_bit_p (visited_blocks, e->dest->index))
{
bitmap_set_bit (visited_blocks, e->dest->index);
bbs.safe_push (e->dest);
}
}
while (i < bbs.length ());
if (all_region_blocks)
bitmap_ior_into (all_region_blocks, visited_blocks);
BITMAP_FREE (visited_blocks);
return bbs;
}
// Callback data for collect_bb2reg.
struct bb2reg_stuff
{
vec *bb2reg;
bool include_uninstrumented_p;
};
// Callback for expand_regions, collect innermost region data for each bb.
static void *
collect_bb2reg (struct tm_region *region, void *data)
{
struct bb2reg_stuff *stuff = (struct bb2reg_stuff *)data;
vec *bb2reg = stuff->bb2reg;
vec queue;
unsigned int i;
basic_block bb;
queue = get_tm_region_blocks (region->entry_block,
region->exit_blocks,
region->irr_blocks,
NULL,
/*stop_at_irr_p=*/true,
stuff->include_uninstrumented_p);
// We expect expand_region to perform a post-order traversal of the region
// tree. Therefore the last region seen for any bb is the innermost.
FOR_EACH_VEC_ELT (queue, i, bb)
(*bb2reg)[bb->index] = region;
queue.release ();
return NULL;
}
// Returns a vector, indexed by BB->INDEX, of the innermost tm_region to
// which a basic block belongs. Note that we only consider the instrumented
// code paths for the region; the uninstrumented code paths are ignored if
// INCLUDE_UNINSTRUMENTED_P is false.
//
// ??? This data is very similar to the bb_regions array that is collected
// during tm_region_init. Or, rather, this data is similar to what could
// be used within tm_region_init. The actual computation in tm_region_init
// begins and ends with bb_regions entirely full of NULL pointers, due to
// the way in which pointers are swapped in and out of the array.
//
// ??? Our callers expect that blocks are not shared between transactions.
// When the optimizers get too smart, and blocks are shared, then during
// the tm_mark phase we'll add log entries to only one of the two transactions,
// and in the tm_edge phase we'll add edges to the CFG that create invalid
// cycles. The symptom being SSA defs that do not dominate their uses.
// Note that the optimizers were locally correct with their transformation,
// as we have no info within the program that suggests that the blocks cannot
// be shared.
//
// ??? There is currently a hack inside tree-ssa-pre.c to work around the
// only known instance of this block sharing.
static vec
get_bb_regions_instrumented (bool traverse_clones,
bool include_uninstrumented_p)
{
unsigned n = last_basic_block;
struct bb2reg_stuff stuff;
vec ret;
ret.create (n);
ret.safe_grow_cleared (n);
stuff.bb2reg = &ret;
stuff.include_uninstrumented_p = include_uninstrumented_p;
expand_regions (all_tm_regions, collect_bb2reg, &stuff, traverse_clones);
return ret;
}
/* Set the IN_TRANSACTION for all gimple statements that appear in a
transaction. */
void
compute_transaction_bits (void)
{
struct tm_region *region;
vec queue;
unsigned int i;
basic_block bb;
/* ?? Perhaps we need to abstract gate_tm_init further, because we
certainly don't need it to calculate CDI_DOMINATOR info. */
gate_tm_init ();
FOR_EACH_BB (bb)
bb->flags &= ~BB_IN_TRANSACTION;
for (region = all_tm_regions; region; region = region->next)
{
queue = get_tm_region_blocks (region->entry_block,
region->exit_blocks,
region->irr_blocks,
NULL,
/*stop_at_irr_p=*/true);
for (i = 0; queue.iterate (i, &bb); ++i)
bb->flags |= BB_IN_TRANSACTION;
queue.release ();
}
if (all_tm_regions)
bitmap_obstack_release (&tm_obstack);
}
/* Replace the GIMPLE_TRANSACTION in this region with the corresponding
call to BUILT_IN_TM_START. */
static void *
expand_transaction (struct tm_region *region, void *data ATTRIBUTE_UNUSED)
{
tree tm_start = builtin_decl_explicit (BUILT_IN_TM_START);
basic_block transaction_bb = gimple_bb (region->transaction_stmt);
tree tm_state = region->tm_state;
tree tm_state_type = TREE_TYPE (tm_state);
edge abort_edge = NULL;
edge inst_edge = NULL;
edge uninst_edge = NULL;
edge fallthru_edge = NULL;
// Identify the various successors of the transaction start.
{
edge_iterator i;
edge e;
FOR_EACH_EDGE (e, i, transaction_bb->succs)
{
if (e->flags & EDGE_TM_ABORT)
abort_edge = e;
else if (e->flags & EDGE_TM_UNINSTRUMENTED)
uninst_edge = e;
else
inst_edge = e;
if (e->flags & EDGE_FALLTHRU)
fallthru_edge = e;
}
}
/* ??? There are plenty of bits here we're not computing. */
{
int subcode = gimple_transaction_subcode (region->transaction_stmt);
int flags = 0;
if (subcode & GTMA_DOES_GO_IRREVOCABLE)
flags |= PR_DOESGOIRREVOCABLE;
if ((subcode & GTMA_MAY_ENTER_IRREVOCABLE) == 0)
flags |= PR_HASNOIRREVOCABLE;
/* If the transaction does not have an abort in lexical scope and is not
marked as an outer transaction, then it will never abort. */
if ((subcode & GTMA_HAVE_ABORT) == 0 && (subcode & GTMA_IS_OUTER) == 0)
flags |= PR_HASNOABORT;
if ((subcode & GTMA_HAVE_STORE) == 0)
flags |= PR_READONLY;
if (inst_edge && !(subcode & GTMA_HAS_NO_INSTRUMENTATION))
flags |= PR_INSTRUMENTEDCODE;
if (uninst_edge)
flags |= PR_UNINSTRUMENTEDCODE;
if (subcode & GTMA_IS_OUTER)
region->original_transaction_was_outer = true;
tree t = build_int_cst (tm_state_type, flags);
gimple call = gimple_build_call (tm_start, 1, t);
gimple_call_set_lhs (call, tm_state);
gimple_set_location (call, gimple_location (region->transaction_stmt));
// Replace the GIMPLE_TRANSACTION with the call to BUILT_IN_TM_START.
gimple_stmt_iterator gsi = gsi_last_bb (transaction_bb);
gcc_assert (gsi_stmt (gsi) == region->transaction_stmt);
gsi_insert_before (&gsi, call, GSI_SAME_STMT);
gsi_remove (&gsi, true);
region->transaction_stmt = call;
}
// Generate log saves.
if (!tm_log_save_addresses.is_empty ())
tm_log_emit_saves (region->entry_block, transaction_bb);
// In the beginning, we've no tests to perform on transaction restart.
// Note that after this point, transaction_bb becomes the "most recent
// block containing tests for the transaction".
region->restart_block = region->entry_block;
// Generate log restores.
if (!tm_log_save_addresses.is_empty ())
{
basic_block test_bb = create_empty_bb (transaction_bb);
basic_block code_bb = create_empty_bb (test_bb);
basic_block join_bb = create_empty_bb (code_bb);
if (current_loops && transaction_bb->loop_father)
{
add_bb_to_loop (test_bb, transaction_bb->loop_father);
add_bb_to_loop (code_bb, transaction_bb->loop_father);
add_bb_to_loop (join_bb, transaction_bb->loop_father);
}
if (region->restart_block == region->entry_block)
region->restart_block = test_bb;
tree t1 = create_tmp_reg (tm_state_type, NULL);
tree t2 = build_int_cst (tm_state_type, A_RESTORELIVEVARIABLES);
gimple stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, t1,
tm_state, t2);
gimple_stmt_iterator gsi = gsi_last_bb (test_bb);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
t2 = build_int_cst (tm_state_type, 0);
stmt = gimple_build_cond (NE_EXPR, t1, t2, NULL, NULL);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
tm_log_emit_restores (region->entry_block, code_bb);
edge ei = make_edge (transaction_bb, test_bb, EDGE_FALLTHRU);
edge et = make_edge (test_bb, code_bb, EDGE_TRUE_VALUE);
edge ef = make_edge (test_bb, join_bb, EDGE_FALSE_VALUE);
redirect_edge_pred (fallthru_edge, join_bb);
join_bb->frequency = test_bb->frequency = transaction_bb->frequency;
join_bb->count = test_bb->count = transaction_bb->count;
ei->probability = PROB_ALWAYS;
et->probability = PROB_LIKELY;
ef->probability = PROB_UNLIKELY;
et->count = apply_probability(test_bb->count, et->probability);
ef->count = apply_probability(test_bb->count, ef->probability);
code_bb->count = et->count;
code_bb->frequency = EDGE_FREQUENCY (et);
transaction_bb = join_bb;
}
// If we have an ABORT edge, create a test to perform the abort.
if (abort_edge)
{
basic_block test_bb = create_empty_bb (transaction_bb);
if (current_loops && transaction_bb->loop_father)
add_bb_to_loop (test_bb, transaction_bb->loop_father);
if (region->restart_block == region->entry_block)
region->restart_block = test_bb;
tree t1 = create_tmp_reg (tm_state_type, NULL);
tree t2 = build_int_cst (tm_state_type, A_ABORTTRANSACTION);
gimple stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, t1,
tm_state, t2);
gimple_stmt_iterator gsi = gsi_last_bb (test_bb);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
t2 = build_int_cst (tm_state_type, 0);
stmt = gimple_build_cond (NE_EXPR, t1, t2, NULL, NULL);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
edge ei = make_edge (transaction_bb, test_bb, EDGE_FALLTHRU);
test_bb->frequency = transaction_bb->frequency;
test_bb->count = transaction_bb->count;
ei->probability = PROB_ALWAYS;
// Not abort edge. If both are live, chose one at random as we'll
// we'll be fixing that up below.
redirect_edge_pred (fallthru_edge, test_bb);
fallthru_edge->flags = EDGE_FALSE_VALUE;
fallthru_edge->probability = PROB_VERY_LIKELY;
fallthru_edge->count
= apply_probability(test_bb->count, fallthru_edge->probability);
// Abort/over edge.
redirect_edge_pred (abort_edge, test_bb);
abort_edge->flags = EDGE_TRUE_VALUE;
abort_edge->probability = PROB_VERY_UNLIKELY;
abort_edge->count
= apply_probability(test_bb->count, abort_edge->probability);
transaction_bb = test_bb;
}
// If we have both instrumented and uninstrumented code paths, select one.
if (inst_edge && uninst_edge)
{
basic_block test_bb = create_empty_bb (transaction_bb);
if (current_loops && transaction_bb->loop_father)
add_bb_to_loop (test_bb, transaction_bb->loop_father);
if (region->restart_block == region->entry_block)
region->restart_block = test_bb;
tree t1 = create_tmp_reg (tm_state_type, NULL);
tree t2 = build_int_cst (tm_state_type, A_RUNUNINSTRUMENTEDCODE);
gimple stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, t1,
tm_state, t2);
gimple_stmt_iterator gsi = gsi_last_bb (test_bb);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
t2 = build_int_cst (tm_state_type, 0);
stmt = gimple_build_cond (NE_EXPR, t1, t2, NULL, NULL);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
// Create the edge into test_bb first, as we want to copy values
// out of the fallthru edge.
edge e = make_edge (transaction_bb, test_bb, fallthru_edge->flags);
e->probability = fallthru_edge->probability;
test_bb->count = e->count = fallthru_edge->count;
test_bb->frequency = EDGE_FREQUENCY (e);
// Now update the edges to the inst/uninist implementations.
// For now assume that the paths are equally likely. When using HTM,
// we'll try the uninst path first and fallback to inst path if htm
// buffers are exceeded. Without HTM we start with the inst path and
// use the uninst path when falling back to serial mode.
redirect_edge_pred (inst_edge, test_bb);
inst_edge->flags = EDGE_FALSE_VALUE;
inst_edge->probability = REG_BR_PROB_BASE / 2;
inst_edge->count
= apply_probability(test_bb->count, inst_edge->probability);
redirect_edge_pred (uninst_edge, test_bb);
uninst_edge->flags = EDGE_TRUE_VALUE;
uninst_edge->probability = REG_BR_PROB_BASE / 2;
uninst_edge->count
= apply_probability(test_bb->count, uninst_edge->probability);
}
// If we have no previous special cases, and we have PHIs at the beginning
// of the atomic region, this means we have a loop at the beginning of the
// atomic region that shares the first block. This can cause problems with
// the transaction restart abnormal edges to be added in the tm_edges pass.
// Solve this by adding a new empty block to receive the abnormal edges.
if (region->restart_block == region->entry_block
&& phi_nodes (region->entry_block))
{
basic_block empty_bb = create_empty_bb (transaction_bb);
region->restart_block = empty_bb;
if (current_loops && transaction_bb->loop_father)
add_bb_to_loop (empty_bb, transaction_bb->loop_father);
redirect_edge_pred (fallthru_edge, empty_bb);
make_edge (transaction_bb, empty_bb, EDGE_FALLTHRU);
}
return NULL;
}
/* Generate the temporary to be used for the return value of
BUILT_IN_TM_START. */
static void *
generate_tm_state (struct tm_region *region, void *data ATTRIBUTE_UNUSED)
{
tree tm_start = builtin_decl_explicit (BUILT_IN_TM_START);
region->tm_state =
create_tmp_reg (TREE_TYPE (TREE_TYPE (tm_start)), "tm_state");
// Reset the subcode, post optimizations. We'll fill this in
// again as we process blocks.
if (region->exit_blocks)
{
unsigned int subcode
= gimple_transaction_subcode (region->transaction_stmt);
if (subcode & GTMA_DOES_GO_IRREVOCABLE)
subcode &= (GTMA_DECLARATION_MASK | GTMA_DOES_GO_IRREVOCABLE
| GTMA_MAY_ENTER_IRREVOCABLE
| GTMA_HAS_NO_INSTRUMENTATION);
else
subcode &= GTMA_DECLARATION_MASK;
gimple_transaction_set_subcode (region->transaction_stmt, subcode);
}
return NULL;
}
// Propagate flags from inner transactions outwards.
static void
propagate_tm_flags_out (struct tm_region *region)
{
if (region == NULL)
return;
propagate_tm_flags_out (region->inner);
if (region->outer && region->outer->transaction_stmt)
{
unsigned s = gimple_transaction_subcode (region->transaction_stmt);
s &= (GTMA_HAVE_ABORT | GTMA_HAVE_LOAD | GTMA_HAVE_STORE
| GTMA_MAY_ENTER_IRREVOCABLE);
s |= gimple_transaction_subcode (region->outer->transaction_stmt);
gimple_transaction_set_subcode (region->outer->transaction_stmt, s);
}
propagate_tm_flags_out (region->next);
}
/* Entry point to the MARK phase of TM expansion. Here we replace
transactional memory statements with calls to builtins, and function
calls with their transactional clones (if available). But we don't
yet lower GIMPLE_TRANSACTION or add the transaction restart back-edges. */
static unsigned int
execute_tm_mark (void)
{
pending_edge_inserts_p = false;
expand_regions (all_tm_regions, generate_tm_state, NULL,
/*traverse_clones=*/true);
tm_log_init ();
vec bb_regions
= get_bb_regions_instrumented (/*traverse_clones=*/true,
/*include_uninstrumented_p=*/false);
struct tm_region *r;
unsigned i;
// Expand memory operations into calls into the runtime.
// This collects log entries as well.
FOR_EACH_VEC_ELT (bb_regions, i, r)
{
if (r != NULL)
{
if (r->transaction_stmt)
{
unsigned sub = gimple_transaction_subcode (r->transaction_stmt);
/* If we're sure to go irrevocable, there won't be
anything to expand, since the run-time will go
irrevocable right away. */
if (sub & GTMA_DOES_GO_IRREVOCABLE
&& sub & GTMA_MAY_ENTER_IRREVOCABLE)
continue;
}
expand_block_tm (r, BASIC_BLOCK (i));
}
}
bb_regions.release ();
// Propagate flags from inner transactions outwards.
propagate_tm_flags_out (all_tm_regions);
// Expand GIMPLE_TRANSACTIONs into calls into the runtime.
expand_regions (all_tm_regions, expand_transaction, NULL,
/*traverse_clones=*/false);
tm_log_emit ();
tm_log_delete ();
if (pending_edge_inserts_p)
gsi_commit_edge_inserts ();
free_dominance_info (CDI_DOMINATORS);
return 0;
}
struct gimple_opt_pass pass_tm_mark =
{
{
GIMPLE_PASS,
"tmmark", /* name */
OPTGROUP_NONE, /* optinfo_flags */
NULL, /* gate */
execute_tm_mark, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_TRANS_MEM, /* tv_id */
PROP_ssa | PROP_cfg, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_update_ssa
| TODO_verify_ssa, /* todo_flags_finish */
}
};
/* Create an abnormal edge from STMT at iter, splitting the block
as necessary. Adjust *PNEXT as needed for the split block. */
static inline void
split_bb_make_tm_edge (gimple stmt, basic_block dest_bb,
gimple_stmt_iterator iter, gimple_stmt_iterator *pnext)
{
basic_block bb = gimple_bb (stmt);
if (!gsi_one_before_end_p (iter))
{
edge e = split_block (bb, stmt);
*pnext = gsi_start_bb (e->dest);
}
make_edge (bb, dest_bb, EDGE_ABNORMAL);
// Record the need for the edge for the benefit of the rtl passes.
if (cfun->gimple_df->tm_restart == NULL)
cfun->gimple_df->tm_restart = htab_create_ggc (31, struct_ptr_hash,
struct_ptr_eq, ggc_free);
struct tm_restart_node dummy;
dummy.stmt = stmt;
dummy.label_or_list = gimple_block_label (dest_bb);
void **slot = htab_find_slot (cfun->gimple_df->tm_restart, &dummy, INSERT);
struct tm_restart_node *n = (struct tm_restart_node *) *slot;
if (n == NULL)
{
n = ggc_alloc_tm_restart_node ();
*n = dummy;
}
else
{
tree old = n->label_or_list;
if (TREE_CODE (old) == LABEL_DECL)
old = tree_cons (NULL, old, NULL);
n->label_or_list = tree_cons (NULL, dummy.label_or_list, old);
}
}
/* Split block BB as necessary for every builtin function we added, and
wire up the abnormal back edges implied by the transaction restart. */
static void
expand_block_edges (struct tm_region *const region, basic_block bb)
{
gimple_stmt_iterator gsi, next_gsi;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi = next_gsi)
{
gimple stmt = gsi_stmt (gsi);
next_gsi = gsi;
gsi_next (&next_gsi);
// ??? Shouldn't we split for any non-pure, non-irrevocable function?
if (gimple_code (stmt) != GIMPLE_CALL
|| (gimple_call_flags (stmt) & ECF_TM_BUILTIN) == 0)
continue;
if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)) == BUILT_IN_TM_ABORT)
{
// If we have a ``_transaction_cancel [[outer]]'', there is only
// one abnormal edge: to the transaction marked OUTER.
// All compiler-generated instances of BUILT_IN_TM_ABORT have a
// constant argument, which we can examine here. Users invoking
// TM_ABORT directly get what they deserve.
tree arg = gimple_call_arg (stmt, 0);
if (TREE_CODE (arg) == INTEGER_CST
&& (TREE_INT_CST_LOW (arg) & AR_OUTERABORT) != 0
&& !decl_is_tm_clone (current_function_decl))
{
// Find the GTMA_IS_OUTER transaction.
for (struct tm_region *o = region; o; o = o->outer)
if (o->original_transaction_was_outer)
{
split_bb_make_tm_edge (stmt, o->restart_block,
gsi, &next_gsi);
break;
}
// Otherwise, the front-end should have semantically checked
// outer aborts, but in either case the target region is not
// within this function.
continue;
}
// Non-outer, TM aborts have an abnormal edge to the inner-most
// transaction, the one being aborted;
split_bb_make_tm_edge (stmt, region->restart_block, gsi, &next_gsi);
}
// All TM builtins have an abnormal edge to the outer-most transaction.
// We never restart inner transactions. For tm clones, we know a-priori
// that the outer-most transaction is outside the function.
if (decl_is_tm_clone (current_function_decl))
continue;
if (cfun->gimple_df->tm_restart == NULL)
cfun->gimple_df->tm_restart
= htab_create_ggc (31, struct_ptr_hash, struct_ptr_eq, ggc_free);
// All TM builtins have an abnormal edge to the outer-most transaction.
// We never restart inner transactions.
for (struct tm_region *o = region; o; o = o->outer)
if (!o->outer)
{
split_bb_make_tm_edge (stmt, o->restart_block, gsi, &next_gsi);
break;
}
// Delete any tail-call annotation that may have been added.
// The tail-call pass may have mis-identified the commit as being
// a candidate because we had not yet added this restart edge.
gimple_call_set_tail (stmt, false);
}
}
/* Entry point to the final expansion of transactional nodes. */
static unsigned int
execute_tm_edges (void)
{
vec bb_regions
= get_bb_regions_instrumented (/*traverse_clones=*/false,
/*include_uninstrumented_p=*/true);
struct tm_region *r;
unsigned i;
FOR_EACH_VEC_ELT (bb_regions, i, r)
if (r != NULL)
expand_block_edges (r, BASIC_BLOCK (i));
bb_regions.release ();
/* We've got to release the dominance info now, to indicate that it
must be rebuilt completely. Otherwise we'll crash trying to update
the SSA web in the TODO section following this pass. */
free_dominance_info (CDI_DOMINATORS);
bitmap_obstack_release (&tm_obstack);
all_tm_regions = NULL;
return 0;
}
struct gimple_opt_pass pass_tm_edges =
{
{
GIMPLE_PASS,
"tmedge", /* name */
OPTGROUP_NONE, /* optinfo_flags */
NULL, /* gate */
execute_tm_edges, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_TRANS_MEM, /* tv_id */
PROP_ssa | PROP_cfg, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_update_ssa
| TODO_verify_ssa, /* todo_flags_finish */
}
};
/* Helper function for expand_regions. Expand REGION and recurse to
the inner region. Call CALLBACK on each region. CALLBACK returns
NULL to continue the traversal, otherwise a non-null value which
this function will return as well. TRAVERSE_CLONES is true if we
should traverse transactional clones. */
static void *
expand_regions_1 (struct tm_region *region,
void *(*callback)(struct tm_region *, void *),
void *data,
bool traverse_clones)
{
void *retval = NULL;
if (region->exit_blocks
|| (traverse_clones && decl_is_tm_clone (current_function_decl)))
{
retval = callback (region, data);
if (retval)
return retval;
}
if (region->inner)
{
retval = expand_regions (region->inner, callback, data, traverse_clones);
if (retval)
return retval;
}
return retval;
}
/* Traverse the regions enclosed and including REGION. Execute
CALLBACK for each region, passing DATA. CALLBACK returns NULL to
continue the traversal, otherwise a non-null value which this
function will return as well. TRAVERSE_CLONES is true if we should
traverse transactional clones. */
static void *
expand_regions (struct tm_region *region,
void *(*callback)(struct tm_region *, void *),
void *data,
bool traverse_clones)
{
void *retval = NULL;
while (region)
{
retval = expand_regions_1 (region, callback, data, traverse_clones);
if (retval)
return retval;
region = region->next;
}
return retval;
}
/* A unique TM memory operation. */
typedef struct tm_memop
{
/* Unique ID that all memory operations to the same location have. */
unsigned int value_id;
/* Address of load/store. */
tree addr;
} *tm_memop_t;
/* TM memory operation hashtable helpers. */
struct tm_memop_hasher : typed_free_remove
{
typedef tm_memop value_type;
typedef tm_memop compare_type;
static inline hashval_t hash (const value_type *);
static inline bool equal (const value_type *, const compare_type *);
};
/* Htab support. Return a hash value for a `tm_memop'. */
inline hashval_t
tm_memop_hasher::hash (const value_type *mem)
{
tree addr = mem->addr;
/* We drill down to the SSA_NAME/DECL for the hash, but equality is
actually done with operand_equal_p (see tm_memop_eq). */
if (TREE_CODE (addr) == ADDR_EXPR)
addr = TREE_OPERAND (addr, 0);
return iterative_hash_expr (addr, 0);
}
/* Htab support. Return true if two tm_memop's are the same. */
inline bool
tm_memop_hasher::equal (const value_type *mem1, const compare_type *mem2)
{
return operand_equal_p (mem1->addr, mem2->addr, 0);
}
/* Sets for solving data flow equations in the memory optimization pass. */
struct tm_memopt_bitmaps
{
/* Stores available to this BB upon entry. Basically, stores that
dominate this BB. */
bitmap store_avail_in;
/* Stores available at the end of this BB. */
bitmap store_avail_out;
bitmap store_antic_in;
bitmap store_antic_out;
/* Reads available to this BB upon entry. Basically, reads that
dominate this BB. */
bitmap read_avail_in;
/* Reads available at the end of this BB. */
bitmap read_avail_out;
/* Reads performed in this BB. */
bitmap read_local;
/* Writes performed in this BB. */
bitmap store_local;
/* Temporary storage for pass. */
/* Is the current BB in the worklist? */
bool avail_in_worklist_p;
/* Have we visited this BB? */
bool visited_p;
};
static bitmap_obstack tm_memopt_obstack;
/* Unique counter for TM loads and stores. Loads and stores of the
same address get the same ID. */
static unsigned int tm_memopt_value_id;
static hash_table tm_memopt_value_numbers;
#define STORE_AVAIL_IN(BB) \
((struct tm_memopt_bitmaps *) ((BB)->aux))->store_avail_in
#define STORE_AVAIL_OUT(BB) \
((struct tm_memopt_bitmaps *) ((BB)->aux))->store_avail_out
#define STORE_ANTIC_IN(BB) \
((struct tm_memopt_bitmaps *) ((BB)->aux))->store_antic_in
#define STORE_ANTIC_OUT(BB) \
((struct tm_memopt_bitmaps *) ((BB)->aux))->store_antic_out
#define READ_AVAIL_IN(BB) \
((struct tm_memopt_bitmaps *) ((BB)->aux))->read_avail_in
#define READ_AVAIL_OUT(BB) \
((struct tm_memopt_bitmaps *) ((BB)->aux))->read_avail_out
#define READ_LOCAL(BB) \
((struct tm_memopt_bitmaps *) ((BB)->aux))->read_local
#define STORE_LOCAL(BB) \
((struct tm_memopt_bitmaps *) ((BB)->aux))->store_local
#define AVAIL_IN_WORKLIST_P(BB) \
((struct tm_memopt_bitmaps *) ((BB)->aux))->avail_in_worklist_p
#define BB_VISITED_P(BB) \
((struct tm_memopt_bitmaps *) ((BB)->aux))->visited_p
/* Given a TM load/store in STMT, return the value number for the address
it accesses. */
static unsigned int
tm_memopt_value_number (gimple stmt, enum insert_option op)
{
struct tm_memop tmpmem, *mem;
tm_memop **slot;
gcc_assert (is_tm_load (stmt) || is_tm_store (stmt));
tmpmem.addr = gimple_call_arg (stmt, 0);
slot = tm_memopt_value_numbers.find_slot (&tmpmem, op);
if (*slot)
mem = *slot;
else if (op == INSERT)
{
mem = XNEW (struct tm_memop);
*slot = mem;
mem->value_id = tm_memopt_value_id++;
mem->addr = tmpmem.addr;
}
else
gcc_unreachable ();
return mem->value_id;
}
/* Accumulate TM memory operations in BB into STORE_LOCAL and READ_LOCAL. */
static void
tm_memopt_accumulate_memops (basic_block bb)
{
gimple_stmt_iterator gsi;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
bitmap bits;
unsigned int loc;
if (is_tm_store (stmt))
bits = STORE_LOCAL (bb);
else if (is_tm_load (stmt))
bits = READ_LOCAL (bb);
else
continue;
loc = tm_memopt_value_number (stmt, INSERT);
bitmap_set_bit (bits, loc);
if (dump_file)
{
fprintf (dump_file, "TM memopt (%s): value num=%d, BB=%d, addr=",
is_tm_load (stmt) ? "LOAD" : "STORE", loc,
gimple_bb (stmt)->index);
print_generic_expr (dump_file, gimple_call_arg (stmt, 0), 0);
fprintf (dump_file, "\n");
}
}
}
/* Prettily dump one of the memopt sets. BITS is the bitmap to dump. */
static void
dump_tm_memopt_set (const char *set_name, bitmap bits)
{
unsigned i;
bitmap_iterator bi;
const char *comma = "";
fprintf (dump_file, "TM memopt: %s: [", set_name);
EXECUTE_IF_SET_IN_BITMAP (bits, 0, i, bi)
{
hash_table ::iterator hi;
struct tm_memop *mem = NULL;
/* Yeah, yeah, yeah. Whatever. This is just for debugging. */
FOR_EACH_HASH_TABLE_ELEMENT (tm_memopt_value_numbers, mem, tm_memop_t, hi)
if (mem->value_id == i)
break;
gcc_assert (mem->value_id == i);
fprintf (dump_file, "%s", comma);
comma = ", ";
print_generic_expr (dump_file, mem->addr, 0);
}
fprintf (dump_file, "]\n");
}
/* Prettily dump all of the memopt sets in BLOCKS. */
static void
dump_tm_memopt_sets (vec blocks)
{
size_t i;
basic_block bb;
for (i = 0; blocks.iterate (i, &bb); ++i)
{
fprintf (dump_file, "------------BB %d---------\n", bb->index);
dump_tm_memopt_set ("STORE_LOCAL", STORE_LOCAL (bb));
dump_tm_memopt_set ("READ_LOCAL", READ_LOCAL (bb));
dump_tm_memopt_set ("STORE_AVAIL_IN", STORE_AVAIL_IN (bb));
dump_tm_memopt_set ("STORE_AVAIL_OUT", STORE_AVAIL_OUT (bb));
dump_tm_memopt_set ("READ_AVAIL_IN", READ_AVAIL_IN (bb));
dump_tm_memopt_set ("READ_AVAIL_OUT", READ_AVAIL_OUT (bb));
}
}
/* Compute {STORE,READ}_AVAIL_IN for the basic block BB. */
static void
tm_memopt_compute_avin (basic_block bb)
{
edge e;
unsigned ix;
/* Seed with the AVOUT of any predecessor. */
for (ix = 0; ix < EDGE_COUNT (bb->preds); ix++)
{
e = EDGE_PRED (bb, ix);
/* Make sure we have already visited this BB, and is thus
initialized.
If e->src->aux is NULL, this predecessor is actually on an
enclosing transaction. We only care about the current
transaction, so ignore it. */
if (e->src->aux && BB_VISITED_P (e->src))
{
bitmap_copy (STORE_AVAIL_IN (bb), STORE_AVAIL_OUT (e->src));
bitmap_copy (READ_AVAIL_IN (bb), READ_AVAIL_OUT (e->src));
break;
}
}
for (; ix < EDGE_COUNT (bb->preds); ix++)
{
e = EDGE_PRED (bb, ix);
if (e->src->aux && BB_VISITED_P (e->src))
{
bitmap_and_into (STORE_AVAIL_IN (bb), STORE_AVAIL_OUT (e->src));
bitmap_and_into (READ_AVAIL_IN (bb), READ_AVAIL_OUT (e->src));
}
}
BB_VISITED_P (bb) = true;
}
/* Compute the STORE_ANTIC_IN for the basic block BB. */
static void
tm_memopt_compute_antin (basic_block bb)
{
edge e;
unsigned ix;
/* Seed with the ANTIC_OUT of any successor. */
for (ix = 0; ix < EDGE_COUNT (bb->succs); ix++)
{
e = EDGE_SUCC (bb, ix);
/* Make sure we have already visited this BB, and is thus
initialized. */
if (BB_VISITED_P (e->dest))
{
bitmap_copy (STORE_ANTIC_IN (bb), STORE_ANTIC_OUT (e->dest));
break;
}
}
for (; ix < EDGE_COUNT (bb->succs); ix++)
{
e = EDGE_SUCC (bb, ix);
if (BB_VISITED_P (e->dest))
bitmap_and_into (STORE_ANTIC_IN (bb), STORE_ANTIC_OUT (e->dest));
}
BB_VISITED_P (bb) = true;
}
/* Compute the AVAIL sets for every basic block in BLOCKS.
We compute {STORE,READ}_AVAIL_{OUT,IN} as follows:
AVAIL_OUT[bb] = union (AVAIL_IN[bb], LOCAL[bb])
AVAIL_IN[bb] = intersect (AVAIL_OUT[predecessors])
This is basically what we do in lcm's compute_available(), but here
we calculate two sets of sets (one for STOREs and one for READs),
and we work on a region instead of the entire CFG.
REGION is the TM region.
BLOCKS are the basic blocks in the region. */
static void
tm_memopt_compute_available (struct tm_region *region,
vec blocks)
{
edge e;
basic_block *worklist, *qin, *qout, *qend, bb;
unsigned int qlen, i;
edge_iterator ei;
bool changed;
/* Allocate a worklist array/queue. Entries are only added to the
list if they were not already on the list. So the size is
bounded by the number of basic blocks in the region. */
qlen = blocks.length () - 1;
qin = qout = worklist =
XNEWVEC (basic_block, qlen);
/* Put every block in the region on the worklist. */
for (i = 0; blocks.iterate (i, &bb); ++i)
{
/* Seed AVAIL_OUT with the LOCAL set. */
bitmap_ior_into (STORE_AVAIL_OUT (bb), STORE_LOCAL (bb));
bitmap_ior_into (READ_AVAIL_OUT (bb), READ_LOCAL (bb));
AVAIL_IN_WORKLIST_P (bb) = true;
/* No need to insert the entry block, since it has an AVIN of
null, and an AVOUT that has already been seeded in. */
if (bb != region->entry_block)
*qin++ = bb;
}
/* The entry block has been initialized with the local sets. */
BB_VISITED_P (region->entry_block) = true;
qin = worklist;
qend = &worklist[qlen];
/* Iterate until the worklist is empty. */
while (qlen)
{
/* Take the first entry off the worklist. */
bb = *qout++;
qlen--;
if (qout >= qend)
qout = worklist;
/* This block can be added to the worklist again if necessary. */
AVAIL_IN_WORKLIST_P (bb) = false;
tm_memopt_compute_avin (bb);
/* Note: We do not add the LOCAL sets here because we already
seeded the AVAIL_OUT sets with them. */
changed = bitmap_ior_into (STORE_AVAIL_OUT (bb), STORE_AVAIL_IN (bb));
changed |= bitmap_ior_into (READ_AVAIL_OUT (bb), READ_AVAIL_IN (bb));
if (changed
&& (region->exit_blocks == NULL
|| !bitmap_bit_p (region->exit_blocks, bb->index)))
/* If the out state of this block changed, then we need to add
its successors to the worklist if they are not already in. */
FOR_EACH_EDGE (e, ei, bb->succs)
if (!AVAIL_IN_WORKLIST_P (e->dest) && e->dest != EXIT_BLOCK_PTR)
{
*qin++ = e->dest;
AVAIL_IN_WORKLIST_P (e->dest) = true;
qlen++;
if (qin >= qend)
qin = worklist;
}
}
free (worklist);
if (dump_file)
dump_tm_memopt_sets (blocks);
}
/* Compute ANTIC sets for every basic block in BLOCKS.
We compute STORE_ANTIC_OUT as follows:
STORE_ANTIC_OUT[bb] = union(STORE_ANTIC_IN[bb], STORE_LOCAL[bb])
STORE_ANTIC_IN[bb] = intersect(STORE_ANTIC_OUT[successors])
REGION is the TM region.
BLOCKS are the basic blocks in the region. */
static void
tm_memopt_compute_antic (struct tm_region *region,
vec blocks)
{
edge e;
basic_block *worklist, *qin, *qout, *qend, bb;
unsigned int qlen;
int i;
edge_iterator ei;
/* Allocate a worklist array/queue. Entries are only added to the
list if they were not already on the list. So the size is
bounded by the number of basic blocks in the region. */
qin = qout = worklist = XNEWVEC (basic_block, blocks.length ());
for (qlen = 0, i = blocks.length () - 1; i >= 0; --i)
{
bb = blocks[i];
/* Seed ANTIC_OUT with the LOCAL set. */
bitmap_ior_into (STORE_ANTIC_OUT (bb), STORE_LOCAL (bb));
/* Put every block in the region on the worklist. */
AVAIL_IN_WORKLIST_P (bb) = true;
/* No need to insert exit blocks, since their ANTIC_IN is NULL,
and their ANTIC_OUT has already been seeded in. */
if (region->exit_blocks
&& !bitmap_bit_p (region->exit_blocks, bb->index))
{
qlen++;
*qin++ = bb;
}
}
/* The exit blocks have been initialized with the local sets. */
if (region->exit_blocks)
{
unsigned int i;
bitmap_iterator bi;
EXECUTE_IF_SET_IN_BITMAP (region->exit_blocks, 0, i, bi)
BB_VISITED_P (BASIC_BLOCK (i)) = true;
}
qin = worklist;
qend = &worklist[qlen];
/* Iterate until the worklist is empty. */
while (qlen)
{
/* Take the first entry off the worklist. */
bb = *qout++;
qlen--;
if (qout >= qend)
qout = worklist;
/* This block can be added to the worklist again if necessary. */
AVAIL_IN_WORKLIST_P (bb) = false;
tm_memopt_compute_antin (bb);
/* Note: We do not add the LOCAL sets here because we already
seeded the ANTIC_OUT sets with them. */
if (bitmap_ior_into (STORE_ANTIC_OUT (bb), STORE_ANTIC_IN (bb))
&& bb != region->entry_block)
/* If the out state of this block changed, then we need to add
its predecessors to the worklist if they are not already in. */
FOR_EACH_EDGE (e, ei, bb->preds)
if (!AVAIL_IN_WORKLIST_P (e->src))
{
*qin++ = e->src;
AVAIL_IN_WORKLIST_P (e->src) = true;
qlen++;
if (qin >= qend)
qin = worklist;
}
}
free (worklist);
if (dump_file)
dump_tm_memopt_sets (blocks);
}
/* Offsets of load variants from TM_LOAD. For example,
BUILT_IN_TM_LOAD_RAR* is an offset of 1 from BUILT_IN_TM_LOAD*.
See gtm-builtins.def. */
#define TRANSFORM_RAR 1
#define TRANSFORM_RAW 2
#define TRANSFORM_RFW 3
/* Offsets of store variants from TM_STORE. */
#define TRANSFORM_WAR 1
#define TRANSFORM_WAW 2
/* Inform about a load/store optimization. */
static void
dump_tm_memopt_transform (gimple stmt)
{
if (dump_file)
{
fprintf (dump_file, "TM memopt: transforming: ");
print_gimple_stmt (dump_file, stmt, 0, 0);
fprintf (dump_file, "\n");
}
}
/* Perform a read/write optimization. Replaces the TM builtin in STMT
by a builtin that is OFFSET entries down in the builtins table in
gtm-builtins.def. */
static void
tm_memopt_transform_stmt (unsigned int offset,
gimple stmt,
gimple_stmt_iterator *gsi)
{
tree fn = gimple_call_fn (stmt);
gcc_assert (TREE_CODE (fn) == ADDR_EXPR);
TREE_OPERAND (fn, 0)
= builtin_decl_explicit ((enum built_in_function)
(DECL_FUNCTION_CODE (TREE_OPERAND (fn, 0))
+ offset));
gimple_call_set_fn (stmt, fn);
gsi_replace (gsi, stmt, true);
dump_tm_memopt_transform (stmt);
}
/* Perform the actual TM memory optimization transformations in the
basic blocks in BLOCKS. */
static void
tm_memopt_transform_blocks (vec blocks)
{
size_t i;
basic_block bb;
gimple_stmt_iterator gsi;
for (i = 0; blocks.iterate (i, &bb); ++i)
{
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
bitmap read_avail = READ_AVAIL_IN (bb);
bitmap store_avail = STORE_AVAIL_IN (bb);
bitmap store_antic = STORE_ANTIC_OUT (bb);
unsigned int loc;
if (is_tm_simple_load (stmt))
{
loc = tm_memopt_value_number (stmt, NO_INSERT);
if (store_avail && bitmap_bit_p (store_avail, loc))
tm_memopt_transform_stmt (TRANSFORM_RAW, stmt, &gsi);
else if (store_antic && bitmap_bit_p (store_antic, loc))
{
tm_memopt_transform_stmt (TRANSFORM_RFW, stmt, &gsi);
bitmap_set_bit (store_avail, loc);
}
else if (read_avail && bitmap_bit_p (read_avail, loc))
tm_memopt_transform_stmt (TRANSFORM_RAR, stmt, &gsi);
else
bitmap_set_bit (read_avail, loc);
}
else if (is_tm_simple_store (stmt))
{
loc = tm_memopt_value_number (stmt, NO_INSERT);
if (store_avail && bitmap_bit_p (store_avail, loc))
tm_memopt_transform_stmt (TRANSFORM_WAW, stmt, &gsi);
else
{
if (read_avail && bitmap_bit_p (read_avail, loc))
tm_memopt_transform_stmt (TRANSFORM_WAR, stmt, &gsi);
bitmap_set_bit (store_avail, loc);
}
}
}
}
}
/* Return a new set of bitmaps for a BB. */
static struct tm_memopt_bitmaps *
tm_memopt_init_sets (void)
{
struct tm_memopt_bitmaps *b
= XOBNEW (&tm_memopt_obstack.obstack, struct tm_memopt_bitmaps);
b->store_avail_in = BITMAP_ALLOC (&tm_memopt_obstack);
b->store_avail_out = BITMAP_ALLOC (&tm_memopt_obstack);
b->store_antic_in = BITMAP_ALLOC (&tm_memopt_obstack);
b->store_antic_out = BITMAP_ALLOC (&tm_memopt_obstack);
b->store_avail_out = BITMAP_ALLOC (&tm_memopt_obstack);
b->read_avail_in = BITMAP_ALLOC (&tm_memopt_obstack);
b->read_avail_out = BITMAP_ALLOC (&tm_memopt_obstack);
b->read_local = BITMAP_ALLOC (&tm_memopt_obstack);
b->store_local = BITMAP_ALLOC (&tm_memopt_obstack);
return b;
}
/* Free sets computed for each BB. */
static void
tm_memopt_free_sets (vec blocks)
{
size_t i;
basic_block bb;
for (i = 0; blocks.iterate (i, &bb); ++i)
bb->aux = NULL;
}
/* Clear the visited bit for every basic block in BLOCKS. */
static void
tm_memopt_clear_visited (vec blocks)
{
size_t i;
basic_block bb;
for (i = 0; blocks.iterate (i, &bb); ++i)
BB_VISITED_P (bb) = false;
}
/* Replace TM load/stores with hints for the runtime. We handle
things like read-after-write, write-after-read, read-after-read,
read-for-write, etc. */
static unsigned int
execute_tm_memopt (void)
{
struct tm_region *region;
vec bbs;
tm_memopt_value_id = 0;
tm_memopt_value_numbers.create (10);
for (region = all_tm_regions; region; region = region->next)
{
/* All the TM stores/loads in the current region. */
size_t i;
basic_block bb;
bitmap_obstack_initialize (&tm_memopt_obstack);
/* Save all BBs for the current region. */
bbs = get_tm_region_blocks (region->entry_block,
region->exit_blocks,
region->irr_blocks,
NULL,
false);
/* Collect all the memory operations. */
for (i = 0; bbs.iterate (i, &bb); ++i)
{
bb->aux = tm_memopt_init_sets ();
tm_memopt_accumulate_memops (bb);
}
/* Solve data flow equations and transform each block accordingly. */
tm_memopt_clear_visited (bbs);
tm_memopt_compute_available (region, bbs);
tm_memopt_clear_visited (bbs);
tm_memopt_compute_antic (region, bbs);
tm_memopt_transform_blocks (bbs);
tm_memopt_free_sets (bbs);
bbs.release ();
bitmap_obstack_release (&tm_memopt_obstack);
tm_memopt_value_numbers.empty ();
}
tm_memopt_value_numbers.dispose ();
return 0;
}
static bool
gate_tm_memopt (void)
{
return flag_tm && optimize > 0;
}
struct gimple_opt_pass pass_tm_memopt =
{
{
GIMPLE_PASS,
"tmmemopt", /* name */
OPTGROUP_NONE, /* optinfo_flags */
gate_tm_memopt, /* gate */
execute_tm_memopt, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_TRANS_MEM, /* tv_id */
PROP_ssa | PROP_cfg, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
}
};
/* Interprocedual analysis for the creation of transactional clones.
The aim of this pass is to find which functions are referenced in
a non-irrevocable transaction context, and for those over which
we have control (or user directive), create a version of the
function which uses only the transactional interface to reference
protected memories. This analysis proceeds in several steps:
(1) Collect the set of all possible transactional clones:
(a) For all local public functions marked tm_callable, push
it onto the tm_callee queue.
(b) For all local functions, scan for calls in transaction blocks.
Push the caller and callee onto the tm_caller and tm_callee
queues. Count the number of callers for each callee.
(c) For each local function on the callee list, assume we will
create a transactional clone. Push *all* calls onto the
callee queues; count the number of clone callers separately
to the number of original callers.
(2) Propagate irrevocable status up the dominator tree:
(a) Any external function on the callee list that is not marked
tm_callable is irrevocable. Push all callers of such onto
a worklist.
(b) For each function on the worklist, mark each block that
contains an irrevocable call. Use the AND operator to
propagate that mark up the dominator tree.
(c) If we reach the entry block for a possible transactional
clone, then the transactional clone is irrevocable, and
we should not create the clone after all. Push all
callers onto the worklist.
(d) Place tm_irrevocable calls at the beginning of the relevant
blocks. Special case here is the entry block for the entire
transaction region; there we mark it GTMA_DOES_GO_IRREVOCABLE for
the library to begin the region in serial mode. Decrement
the call count for all callees in the irrevocable region.
(3) Create the transactional clones:
Any tm_callee that still has a non-zero call count is cloned.
*/
/* This structure is stored in the AUX field of each cgraph_node. */
struct tm_ipa_cg_data
{
/* The clone of the function that got created. */
struct cgraph_node *clone;
/* The tm regions in the normal function. */
struct tm_region *all_tm_regions;
/* The blocks of the normal/clone functions that contain irrevocable
calls, or blocks that are post-dominated by irrevocable calls. */
bitmap irrevocable_blocks_normal;
bitmap irrevocable_blocks_clone;
/* The blocks of the normal function that are involved in transactions. */
bitmap transaction_blocks_normal;
/* The number of callers to the transactional clone of this function
from normal and transactional clones respectively. */
unsigned tm_callers_normal;
unsigned tm_callers_clone;
/* True if all calls to this function's transactional clone
are irrevocable. Also automatically true if the function
has no transactional clone. */
bool is_irrevocable;
/* Flags indicating the presence of this function in various queues. */
bool in_callee_queue;
bool in_worklist;
/* Flags indicating the kind of scan desired while in the worklist. */
bool want_irr_scan_normal;
};
typedef vec cgraph_node_queue;
/* Return the ipa data associated with NODE, allocating zeroed memory
if necessary. TRAVERSE_ALIASES is true if we must traverse aliases
and set *NODE accordingly. */
static struct tm_ipa_cg_data *
get_cg_data (struct cgraph_node **node, bool traverse_aliases)
{
struct tm_ipa_cg_data *d;
if (traverse_aliases && (*node)->symbol.alias)
*node = cgraph_alias_target (*node);
d = (struct tm_ipa_cg_data *) (*node)->symbol.aux;
if (d == NULL)
{
d = (struct tm_ipa_cg_data *)
obstack_alloc (&tm_obstack.obstack, sizeof (*d));
(*node)->symbol.aux = (void *) d;
memset (d, 0, sizeof (*d));
}
return d;
}
/* Add NODE to the end of QUEUE, unless IN_QUEUE_P indicates that
it is already present. */
static void
maybe_push_queue (struct cgraph_node *node,
cgraph_node_queue *queue_p, bool *in_queue_p)
{
if (!*in_queue_p)
{
*in_queue_p = true;
queue_p->safe_push (node);
}
}
/* Duplicate the basic blocks in QUEUE for use in the uninstrumented
code path. QUEUE are the basic blocks inside the transaction
represented in REGION.
Later in split_code_paths() we will add the conditional to choose
between the two alternatives. */
static void
ipa_uninstrument_transaction (struct tm_region *region,
vec queue)
{
gimple transaction = region->transaction_stmt;
basic_block transaction_bb = gimple_bb (transaction);
int n = queue.length ();
basic_block *new_bbs = XNEWVEC (basic_block, n);
copy_bbs (queue.address (), n, new_bbs, NULL, 0, NULL, NULL, transaction_bb,
true);
edge e = make_edge (transaction_bb, new_bbs[0], EDGE_TM_UNINSTRUMENTED);
add_phi_args_after_copy (new_bbs, n, e);
// Now we will have a GIMPLE_ATOMIC with 3 possible edges out of it.
// a) EDGE_FALLTHRU into the transaction
// b) EDGE_TM_ABORT out of the transaction
// c) EDGE_TM_UNINSTRUMENTED into the uninstrumented blocks.
free (new_bbs);
}
/* A subroutine of ipa_tm_scan_calls_transaction and ipa_tm_scan_calls_clone.
Queue all callees within block BB. */
static void
ipa_tm_scan_calls_block (cgraph_node_queue *callees_p,
basic_block bb, bool for_clone)
{
gimple_stmt_iterator gsi;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
if (is_gimple_call (stmt) && !is_tm_pure_call (stmt))
{
tree fndecl = gimple_call_fndecl (stmt);
if (fndecl)
{
struct tm_ipa_cg_data *d;
unsigned *pcallers;
struct cgraph_node *node;
if (is_tm_ending_fndecl (fndecl))
continue;
if (find_tm_replacement_function (fndecl))
continue;
node = cgraph_get_node (fndecl);
gcc_assert (node != NULL);
d = get_cg_data (&node, true);
pcallers = (for_clone ? &d->tm_callers_clone
: &d->tm_callers_normal);
*pcallers += 1;
maybe_push_queue (node, callees_p, &d->in_callee_queue);
}
}
}
}
/* Scan all calls in NODE that are within a transaction region,
and push the resulting nodes into the callee queue. */
static void
ipa_tm_scan_calls_transaction (struct tm_ipa_cg_data *d,
cgraph_node_queue *callees_p)
{
struct tm_region *r;
d->transaction_blocks_normal = BITMAP_ALLOC (&tm_obstack);
d->all_tm_regions = all_tm_regions;
for (r = all_tm_regions; r; r = r->next)
{
vec bbs;
basic_block bb;
unsigned i;
bbs = get_tm_region_blocks (r->entry_block, r->exit_blocks, NULL,
d->transaction_blocks_normal, false);
// Generate the uninstrumented code path for this transaction.
ipa_uninstrument_transaction (r, bbs);
FOR_EACH_VEC_ELT (bbs, i, bb)
ipa_tm_scan_calls_block (callees_p, bb, false);
bbs.release ();
}
// ??? copy_bbs should maintain cgraph edges for the blocks as it is
// copying them, rather than forcing us to do this externally.
rebuild_cgraph_edges ();
// ??? In ipa_uninstrument_transaction we don't try to update dominators
// because copy_bbs doesn't return a VEC like iterate_fix_dominators expects.
// Instead, just release dominators here so update_ssa recomputes them.
free_dominance_info (CDI_DOMINATORS);
// When building the uninstrumented code path, copy_bbs will have invoked
// create_new_def_for starting an "ssa update context". There is only one
// instance of this context, so resolve ssa updates before moving on to
// the next function.
update_ssa (TODO_update_ssa);
}
/* Scan all calls in NODE as if this is the transactional clone,
and push the destinations into the callee queue. */
static void
ipa_tm_scan_calls_clone (struct cgraph_node *node,
cgraph_node_queue *callees_p)
{
struct function *fn = DECL_STRUCT_FUNCTION (node->symbol.decl);
basic_block bb;
FOR_EACH_BB_FN (bb, fn)
ipa_tm_scan_calls_block (callees_p, bb, true);
}
/* The function NODE has been detected to be irrevocable. Push all
of its callers onto WORKLIST for the purpose of re-scanning them. */
static void
ipa_tm_note_irrevocable (struct cgraph_node *node,
cgraph_node_queue *worklist_p)
{
struct tm_ipa_cg_data *d = get_cg_data (&node, true);
struct cgraph_edge *e;
d->is_irrevocable = true;
for (e = node->callers; e ; e = e->next_caller)
{
basic_block bb;
struct cgraph_node *caller;
/* Don't examine recursive calls. */
if (e->caller == node)
continue;
/* Even if we think we can go irrevocable, believe the user
above all. */
if (is_tm_safe_or_pure (e->caller->symbol.decl))
continue;
caller = e->caller;
d = get_cg_data (&caller, true);
/* Check if the callee is in a transactional region. If so,
schedule the function for normal re-scan as well. */
bb = gimple_bb (e->call_stmt);
gcc_assert (bb != NULL);
if (d->transaction_blocks_normal
&& bitmap_bit_p (d->transaction_blocks_normal, bb->index))
d->want_irr_scan_normal = true;
maybe_push_queue (caller, worklist_p, &d->in_worklist);
}
}
/* A subroutine of ipa_tm_scan_irr_blocks; return true iff any statement
within the block is irrevocable. */
static bool
ipa_tm_scan_irr_block (basic_block bb)
{
gimple_stmt_iterator gsi;
tree fn;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
switch (gimple_code (stmt))
{
case GIMPLE_ASSIGN:
if (gimple_assign_single_p (stmt))
{
tree lhs = gimple_assign_lhs (stmt);
tree rhs = gimple_assign_rhs1 (stmt);
if (volatile_var_p (lhs) || volatile_var_p (rhs))
return true;
}
break;
case GIMPLE_CALL:
{
tree lhs = gimple_call_lhs (stmt);
if (lhs && volatile_var_p (lhs))
return true;
if (is_tm_pure_call (stmt))
break;
fn = gimple_call_fn (stmt);
/* Functions with the attribute are by definition irrevocable. */
if (is_tm_irrevocable (fn))
return true;
/* For direct function calls, go ahead and check for replacement
functions, or transitive irrevocable functions. For indirect
functions, we'll ask the runtime. */
if (TREE_CODE (fn) == ADDR_EXPR)
{
struct tm_ipa_cg_data *d;
struct cgraph_node *node;
fn = TREE_OPERAND (fn, 0);
if (is_tm_ending_fndecl (fn))
break;
if (find_tm_replacement_function (fn))
break;
node = cgraph_get_node(fn);
d = get_cg_data (&node, true);
/* Return true if irrevocable, but above all, believe
the user. */
if (d->is_irrevocable
&& !is_tm_safe_or_pure (fn))
return true;
}
break;
}
case GIMPLE_ASM:
/* ??? The Approved Method of indicating that an inline
assembly statement is not relevant to the transaction
is to wrap it in a __tm_waiver block. This is not
yet implemented, so we can't check for it. */
if (is_tm_safe (current_function_decl))
{
tree t = build1 (NOP_EXPR, void_type_node, size_zero_node);
SET_EXPR_LOCATION (t, gimple_location (stmt));
error ("%Kasm not allowed in % function", t);
}
return true;
default:
break;
}
}
return false;
}
/* For each of the blocks seeded witin PQUEUE, walk the CFG looking
for new irrevocable blocks, marking them in NEW_IRR. Don't bother
scanning past OLD_IRR or EXIT_BLOCKS. */
static bool
ipa_tm_scan_irr_blocks (vec *pqueue, bitmap new_irr,
bitmap old_irr, bitmap exit_blocks)
{
bool any_new_irr = false;
edge e;
edge_iterator ei;
bitmap visited_blocks = BITMAP_ALLOC (NULL);
do
{
basic_block bb = pqueue->pop ();
/* Don't re-scan blocks we know already are irrevocable. */
if (old_irr && bitmap_bit_p (old_irr, bb->index))
continue;
if (ipa_tm_scan_irr_block (bb))
{
bitmap_set_bit (new_irr, bb->index);
any_new_irr = true;
}
else if (exit_blocks == NULL || !bitmap_bit_p (exit_blocks, bb->index))
{
FOR_EACH_EDGE (e, ei, bb->succs)
if (!bitmap_bit_p (visited_blocks, e->dest->index))
{
bitmap_set_bit (visited_blocks, e->dest->index);
pqueue->safe_push (e->dest);
}
}
}
while (!pqueue->is_empty ());
BITMAP_FREE (visited_blocks);
return any_new_irr;
}
/* Propagate the irrevocable property both up and down the dominator tree.
BB is the current block being scanned; EXIT_BLOCKS are the edges of the
TM regions; OLD_IRR are the results of a previous scan of the dominator
tree which has been fully propagated; NEW_IRR is the set of new blocks
which are gaining the irrevocable property during the current scan. */
static void
ipa_tm_propagate_irr (basic_block entry_block, bitmap new_irr,
bitmap old_irr, bitmap exit_blocks)
{
vec bbs;
bitmap all_region_blocks;
/* If this block is in the old set, no need to rescan. */
if (old_irr && bitmap_bit_p (old_irr, entry_block->index))
return;
all_region_blocks = BITMAP_ALLOC (&tm_obstack);
bbs = get_tm_region_blocks (entry_block, exit_blocks, NULL,
all_region_blocks, false);
do
{
basic_block bb = bbs.pop ();
bool this_irr = bitmap_bit_p (new_irr, bb->index);
bool all_son_irr = false;
edge_iterator ei;
edge e;
/* Propagate up. If my children are, I am too, but we must have
at least one child that is. */
if (!this_irr)
{
FOR_EACH_EDGE (e, ei, bb->succs)
{
if (!bitmap_bit_p (new_irr, e->dest->index))
{
all_son_irr = false;
break;
}
else
all_son_irr = true;
}
if (all_son_irr)
{
/* Add block to new_irr if it hasn't already been processed. */
if (!old_irr || !bitmap_bit_p (old_irr, bb->index))
{
bitmap_set_bit (new_irr, bb->index);
this_irr = true;
}
}
}
/* Propagate down to everyone we immediately dominate. */
if (this_irr)
{
basic_block son;
for (son = first_dom_son (CDI_DOMINATORS, bb);
son;
son = next_dom_son (CDI_DOMINATORS, son))
{
/* Make sure block is actually in a TM region, and it
isn't already in old_irr. */
if ((!old_irr || !bitmap_bit_p (old_irr, son->index))
&& bitmap_bit_p (all_region_blocks, son->index))
bitmap_set_bit (new_irr, son->index);
}
}
}
while (!bbs.is_empty ());
BITMAP_FREE (all_region_blocks);
bbs.release ();
}
static void
ipa_tm_decrement_clone_counts (basic_block bb, bool for_clone)
{
gimple_stmt_iterator gsi;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
if (is_gimple_call (stmt) && !is_tm_pure_call (stmt))
{
tree fndecl = gimple_call_fndecl (stmt);
if (fndecl)
{
struct tm_ipa_cg_data *d;
unsigned *pcallers;
struct cgraph_node *tnode;
if (is_tm_ending_fndecl (fndecl))
continue;
if (find_tm_replacement_function (fndecl))
continue;
tnode = cgraph_get_node (fndecl);
d = get_cg_data (&tnode, true);
pcallers = (for_clone ? &d->tm_callers_clone
: &d->tm_callers_normal);
gcc_assert (*pcallers > 0);
*pcallers -= 1;
}
}
}
}
/* (Re-)Scan the transaction blocks in NODE for calls to irrevocable functions,
as well as other irrevocable actions such as inline assembly. Mark all
such blocks as irrevocable and decrement the number of calls to
transactional clones. Return true if, for the transactional clone, the
entire function is irrevocable. */
static bool
ipa_tm_scan_irr_function (struct cgraph_node *node, bool for_clone)
{
struct tm_ipa_cg_data *d;
bitmap new_irr, old_irr;
vec queue;
bool ret = false;
/* Builtin operators (operator new, and such). */
if (DECL_STRUCT_FUNCTION (node->symbol.decl) == NULL
|| DECL_STRUCT_FUNCTION (node->symbol.decl)->cfg == NULL)
return false;
push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
calculate_dominance_info (CDI_DOMINATORS);
d = get_cg_data (&node, true);
queue.create (10);
new_irr = BITMAP_ALLOC (&tm_obstack);
/* Scan each tm region, propagating irrevocable status through the tree. */
if (for_clone)
{
old_irr = d->irrevocable_blocks_clone;
queue.quick_push (single_succ (ENTRY_BLOCK_PTR));
if (ipa_tm_scan_irr_blocks (&queue, new_irr, old_irr, NULL))
{
ipa_tm_propagate_irr (single_succ (ENTRY_BLOCK_PTR), new_irr,
old_irr, NULL);
ret = bitmap_bit_p (new_irr, single_succ (ENTRY_BLOCK_PTR)->index);
}
}
else
{
struct tm_region *region;
old_irr = d->irrevocable_blocks_normal;
for (region = d->all_tm_regions; region; region = region->next)
{
queue.quick_push (region->entry_block);
if (ipa_tm_scan_irr_blocks (&queue, new_irr, old_irr,
region->exit_blocks))
ipa_tm_propagate_irr (region->entry_block, new_irr, old_irr,
region->exit_blocks);
}
}
/* If we found any new irrevocable blocks, reduce the call count for
transactional clones within the irrevocable blocks. Save the new
set of irrevocable blocks for next time. */
if (!bitmap_empty_p (new_irr))
{
bitmap_iterator bmi;
unsigned i;
EXECUTE_IF_SET_IN_BITMAP (new_irr, 0, i, bmi)
ipa_tm_decrement_clone_counts (BASIC_BLOCK (i), for_clone);
if (old_irr)
{
bitmap_ior_into (old_irr, new_irr);
BITMAP_FREE (new_irr);
}
else if (for_clone)
d->irrevocable_blocks_clone = new_irr;
else
d->irrevocable_blocks_normal = new_irr;
if (dump_file && new_irr)
{
const char *dname;
bitmap_iterator bmi;
unsigned i;
dname = lang_hooks.decl_printable_name (current_function_decl, 2);
EXECUTE_IF_SET_IN_BITMAP (new_irr, 0, i, bmi)
fprintf (dump_file, "%s: bb %d goes irrevocable\n", dname, i);
}
}
else
BITMAP_FREE (new_irr);
queue.release ();
pop_cfun ();
return ret;
}
/* Return true if, for the transactional clone of NODE, any call
may enter irrevocable mode. */
static bool
ipa_tm_mayenterirr_function (struct cgraph_node *node)
{
struct tm_ipa_cg_data *d;
tree decl;
unsigned flags;
d = get_cg_data (&node, true);
decl = node->symbol.decl;
flags = flags_from_decl_or_type (decl);
/* Handle some TM builtins. Ordinarily these aren't actually generated
at this point, but handling these functions when written in by the
user makes it easier to build unit tests. */
if (flags & ECF_TM_BUILTIN)
return false;
/* Filter out all functions that are marked. */
if (flags & ECF_TM_PURE)
return false;
if (is_tm_safe (decl))
return false;
if (is_tm_irrevocable (decl))
return true;
if (is_tm_callable (decl))
return true;
if (find_tm_replacement_function (decl))
return true;
/* If we aren't seeing the final version of the function we don't
know what it will contain at runtime. */
if (cgraph_function_body_availability (node) < AVAIL_AVAILABLE)
return true;
/* If the function must go irrevocable, then of course true. */
if (d->is_irrevocable)
return true;
/* If there are any blocks marked irrevocable, then the function
as a whole may enter irrevocable. */
if (d->irrevocable_blocks_clone)
return true;
/* We may have previously marked this function as tm_may_enter_irr;
see pass_diagnose_tm_blocks. */
if (node->local.tm_may_enter_irr)
return true;
/* Recurse on the main body for aliases. In general, this will
result in one of the bits above being set so that we will not
have to recurse next time. */
if (node->symbol.alias)
return ipa_tm_mayenterirr_function (cgraph_get_node (node->thunk.alias));
/* What remains is unmarked local functions without items that force
the function to go irrevocable. */
return false;
}
/* Diagnose calls from transaction_safe functions to unmarked
functions that are determined to not be safe. */
static void
ipa_tm_diagnose_tm_safe (struct cgraph_node *node)
{
struct cgraph_edge *e;
for (e = node->callees; e ; e = e->next_callee)
if (!is_tm_callable (e->callee->symbol.decl)
&& e->callee->local.tm_may_enter_irr)
error_at (gimple_location (e->call_stmt),
"unsafe function call %qD within "
"% function", e->callee->symbol.decl);
}
/* Diagnose call from atomic transactions to unmarked functions
that are determined to not be safe. */
static void
ipa_tm_diagnose_transaction (struct cgraph_node *node,
struct tm_region *all_tm_regions)
{
struct tm_region *r;
for (r = all_tm_regions; r ; r = r->next)
if (gimple_transaction_subcode (r->transaction_stmt) & GTMA_IS_RELAXED)
{
/* Atomic transactions can be nested inside relaxed. */
if (r->inner)
ipa_tm_diagnose_transaction (node, r->inner);
}
else
{
vec bbs;
gimple_stmt_iterator gsi;
basic_block bb;
size_t i;
bbs = get_tm_region_blocks (r->entry_block, r->exit_blocks,
r->irr_blocks, NULL, false);
for (i = 0; bbs.iterate (i, &bb); ++i)
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
tree fndecl;
if (gimple_code (stmt) == GIMPLE_ASM)
{
error_at (gimple_location (stmt),
"asm not allowed in atomic transaction");
continue;
}
if (!is_gimple_call (stmt))
continue;
fndecl = gimple_call_fndecl (stmt);
/* Indirect function calls have been diagnosed already. */
if (!fndecl)
continue;
/* Stop at the end of the transaction. */
if (is_tm_ending_fndecl (fndecl))
{
if (bitmap_bit_p (r->exit_blocks, bb->index))
break;
continue;
}
/* Marked functions have been diagnosed already. */
if (is_tm_pure_call (stmt))
continue;
if (is_tm_callable (fndecl))
continue;
if (cgraph_local_info (fndecl)->tm_may_enter_irr)
error_at (gimple_location (stmt),
"unsafe function call %qD within "
"atomic transaction", fndecl);
}
bbs.release ();
}
}
/* Return a transactional mangled name for the DECL_ASSEMBLER_NAME in
OLD_DECL. The returned value is a freshly malloced pointer that
should be freed by the caller. */
static tree
tm_mangle (tree old_asm_id)
{
const char *old_asm_name;
char *tm_name;
void *alloc = NULL;
struct demangle_component *dc;
tree new_asm_id;
/* Determine if the symbol is already a valid C++ mangled name. Do this
even for C, which might be interfacing with C++ code via appropriately
ugly identifiers. */
/* ??? We could probably do just as well checking for "_Z" and be done. */
old_asm_name = IDENTIFIER_POINTER (old_asm_id);
dc = cplus_demangle_v3_components (old_asm_name, DMGL_NO_OPTS, &alloc);
if (dc == NULL)
{
char length[8];
do_unencoded:
sprintf (length, "%u", IDENTIFIER_LENGTH (old_asm_id));
tm_name = concat ("_ZGTt", length, old_asm_name, NULL);
}
else
{
old_asm_name += 2; /* Skip _Z */
switch (dc->type)
{
case DEMANGLE_COMPONENT_TRANSACTION_CLONE:
case DEMANGLE_COMPONENT_NONTRANSACTION_CLONE:
/* Don't play silly games, you! */
goto do_unencoded;
case DEMANGLE_COMPONENT_HIDDEN_ALIAS:
/* I'd really like to know if we can ever be passed one of
these from the C++ front end. The Logical Thing would
seem that hidden-alias should be outer-most, so that we
get hidden-alias of a transaction-clone and not vice-versa. */
old_asm_name += 2;
break;
default:
break;
}
tm_name = concat ("_ZGTt", old_asm_name, NULL);
}
free (alloc);
new_asm_id = get_identifier (tm_name);
free (tm_name);
return new_asm_id;
}
static inline void
ipa_tm_mark_force_output_node (struct cgraph_node *node)
{
cgraph_mark_force_output_node (node);
node->symbol.analyzed = true;
}
static inline void
ipa_tm_mark_forced_by_abi_node (struct cgraph_node *node)
{
node->symbol.forced_by_abi = true;
node->symbol.analyzed = true;
}
/* Callback data for ipa_tm_create_version_alias. */
struct create_version_alias_info
{
struct cgraph_node *old_node;
tree new_decl;
};
/* A subroutine of ipa_tm_create_version, called via
cgraph_for_node_and_aliases. Create new tm clones for each of
the existing aliases. */
static bool
ipa_tm_create_version_alias (struct cgraph_node *node, void *data)
{
struct create_version_alias_info *info
= (struct create_version_alias_info *)data;
tree old_decl, new_decl, tm_name;
struct cgraph_node *new_node;
if (!node->symbol.cpp_implicit_alias)
return false;
old_decl = node->symbol.decl;
tm_name = tm_mangle (DECL_ASSEMBLER_NAME (old_decl));
new_decl = build_decl (DECL_SOURCE_LOCATION (old_decl),
TREE_CODE (old_decl), tm_name,
TREE_TYPE (old_decl));
SET_DECL_ASSEMBLER_NAME (new_decl, tm_name);
SET_DECL_RTL (new_decl, NULL);
/* Based loosely on C++'s make_alias_for(). */
TREE_PUBLIC (new_decl) = TREE_PUBLIC (old_decl);
DECL_CONTEXT (new_decl) = DECL_CONTEXT (old_decl);
DECL_LANG_SPECIFIC (new_decl) = DECL_LANG_SPECIFIC (old_decl);
TREE_READONLY (new_decl) = TREE_READONLY (old_decl);
DECL_EXTERNAL (new_decl) = 0;
DECL_ARTIFICIAL (new_decl) = 1;
TREE_ADDRESSABLE (new_decl) = 1;
TREE_USED (new_decl) = 1;
TREE_SYMBOL_REFERENCED (tm_name) = 1;
/* Perform the same remapping to the comdat group. */
if (DECL_ONE_ONLY (new_decl))
DECL_COMDAT_GROUP (new_decl) = tm_mangle (DECL_COMDAT_GROUP (old_decl));
new_node = cgraph_same_body_alias (NULL, new_decl, info->new_decl);
new_node->tm_clone = true;
new_node->symbol.externally_visible = info->old_node->symbol.externally_visible;
/* ?? Do not traverse aliases here. */
get_cg_data (&node, false)->clone = new_node;
record_tm_clone_pair (old_decl, new_decl);
if (info->old_node->symbol.force_output
|| ipa_ref_list_first_referring (&info->old_node->symbol.ref_list))
ipa_tm_mark_force_output_node (new_node);
if (info->old_node->symbol.forced_by_abi)
ipa_tm_mark_forced_by_abi_node (new_node);
return false;
}
/* Create a copy of the function (possibly declaration only) of OLD_NODE,
appropriate for the transactional clone. */
static void
ipa_tm_create_version (struct cgraph_node *old_node)
{
tree new_decl, old_decl, tm_name;
struct cgraph_node *new_node;
old_decl = old_node->symbol.decl;
new_decl = copy_node (old_decl);
/* DECL_ASSEMBLER_NAME needs to be set before we call
cgraph_copy_node_for_versioning below, because cgraph_node will
fill the assembler_name_hash. */
tm_name = tm_mangle (DECL_ASSEMBLER_NAME (old_decl));
SET_DECL_ASSEMBLER_NAME (new_decl, tm_name);
SET_DECL_RTL (new_decl, NULL);
TREE_SYMBOL_REFERENCED (tm_name) = 1;
/* Perform the same remapping to the comdat group. */
if (DECL_ONE_ONLY (new_decl))
DECL_COMDAT_GROUP (new_decl) = tm_mangle (DECL_COMDAT_GROUP (old_decl));
new_node = cgraph_copy_node_for_versioning (old_node, new_decl, vNULL, NULL);
new_node->symbol.externally_visible = old_node->symbol.externally_visible;
new_node->lowered = true;
new_node->tm_clone = 1;
get_cg_data (&old_node, true)->clone = new_node;
if (cgraph_function_body_availability (old_node) >= AVAIL_OVERWRITABLE)
{
/* Remap extern inline to static inline. */
/* ??? Is it worth trying to use make_decl_one_only? */
if (DECL_DECLARED_INLINE_P (new_decl) && DECL_EXTERNAL (new_decl))
{
DECL_EXTERNAL (new_decl) = 0;
TREE_PUBLIC (new_decl) = 0;
DECL_WEAK (new_decl) = 0;
}
tree_function_versioning (old_decl, new_decl,
NULL, false, NULL,
false, NULL, NULL);
}
record_tm_clone_pair (old_decl, new_decl);
cgraph_call_function_insertion_hooks (new_node);
if (old_node->symbol.force_output
|| ipa_ref_list_first_referring (&old_node->symbol.ref_list))
ipa_tm_mark_force_output_node (new_node);
if (old_node->symbol.forced_by_abi)
ipa_tm_mark_forced_by_abi_node (new_node);
/* Do the same thing, but for any aliases of the original node. */
{
struct create_version_alias_info data;
data.old_node = old_node;
data.new_decl = new_decl;
cgraph_for_node_and_aliases (old_node, ipa_tm_create_version_alias,
&data, true);
}
}
/* Construct a call to TM_IRREVOCABLE and insert it at the beginning of BB. */
static void
ipa_tm_insert_irr_call (struct cgraph_node *node, struct tm_region *region,
basic_block bb)
{
gimple_stmt_iterator gsi;
gimple g;
transaction_subcode_ior (region, GTMA_MAY_ENTER_IRREVOCABLE);
g = gimple_build_call (builtin_decl_explicit (BUILT_IN_TM_IRREVOCABLE),
1, build_int_cst (NULL_TREE, MODE_SERIALIRREVOCABLE));
split_block_after_labels (bb);
gsi = gsi_after_labels (bb);
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
cgraph_create_edge (node,
cgraph_get_create_node
(builtin_decl_explicit (BUILT_IN_TM_IRREVOCABLE)),
g, 0,
compute_call_stmt_bb_frequency (node->symbol.decl,
gimple_bb (g)));
}
/* Construct a call to TM_GETTMCLONE and insert it before GSI. */
static bool
ipa_tm_insert_gettmclone_call (struct cgraph_node *node,
struct tm_region *region,
gimple_stmt_iterator *gsi, gimple stmt)
{
tree gettm_fn, ret, old_fn, callfn;
gimple g, g2;
bool safe;
old_fn = gimple_call_fn (stmt);
if (TREE_CODE (old_fn) == ADDR_EXPR)
{
tree fndecl = TREE_OPERAND (old_fn, 0);
tree clone = get_tm_clone_pair (fndecl);
/* By transforming the call into a TM_GETTMCLONE, we are
technically taking the address of the original function and
its clone. Explain this so inlining will know this function
is needed. */
cgraph_mark_address_taken_node (cgraph_get_node (fndecl));
if (clone)
cgraph_mark_address_taken_node (cgraph_get_node (clone));
}
safe = is_tm_safe (TREE_TYPE (old_fn));
gettm_fn = builtin_decl_explicit (safe ? BUILT_IN_TM_GETTMCLONE_SAFE
: BUILT_IN_TM_GETTMCLONE_IRR);
ret = create_tmp_var (ptr_type_node, NULL);
if (!safe)
transaction_subcode_ior (region, GTMA_MAY_ENTER_IRREVOCABLE);
/* Discard OBJ_TYPE_REF, since we weren't able to fold it. */
if (TREE_CODE (old_fn) == OBJ_TYPE_REF)
old_fn = OBJ_TYPE_REF_EXPR (old_fn);
g = gimple_build_call (gettm_fn, 1, old_fn);
ret = make_ssa_name (ret, g);
gimple_call_set_lhs (g, ret);
gsi_insert_before (gsi, g, GSI_SAME_STMT);
cgraph_create_edge (node, cgraph_get_create_node (gettm_fn), g, 0,
compute_call_stmt_bb_frequency (node->symbol.decl,
gimple_bb(g)));
/* Cast return value from tm_gettmclone* into appropriate function
pointer. */
callfn = create_tmp_var (TREE_TYPE (old_fn), NULL);
g2 = gimple_build_assign (callfn,
fold_build1 (NOP_EXPR, TREE_TYPE (callfn), ret));
callfn = make_ssa_name (callfn, g2);
gimple_assign_set_lhs (g2, callfn);
gsi_insert_before (gsi, g2, GSI_SAME_STMT);
/* ??? This is a hack to preserve the NOTHROW bit on the call,
which we would have derived from the decl. Failure to save
this bit means we might have to split the basic block. */
if (gimple_call_nothrow_p (stmt))
gimple_call_set_nothrow (stmt, true);
gimple_call_set_fn (stmt, callfn);
/* Discarding OBJ_TYPE_REF above may produce incompatible LHS and RHS
for a call statement. Fix it. */
{
tree lhs = gimple_call_lhs (stmt);
tree rettype = TREE_TYPE (gimple_call_fntype (stmt));
if (lhs
&& !useless_type_conversion_p (TREE_TYPE (lhs), rettype))
{
tree temp;
temp = create_tmp_reg (rettype, 0);
gimple_call_set_lhs (stmt, temp);
g2 = gimple_build_assign (lhs,
fold_build1 (VIEW_CONVERT_EXPR,
TREE_TYPE (lhs), temp));
gsi_insert_after (gsi, g2, GSI_SAME_STMT);
}
}
update_stmt (stmt);
return true;
}
/* Helper function for ipa_tm_transform_calls*. Given a call
statement in GSI which resides inside transaction REGION, redirect
the call to either its wrapper function, or its clone. */
static void
ipa_tm_transform_calls_redirect (struct cgraph_node *node,
struct tm_region *region,
gimple_stmt_iterator *gsi,
bool *need_ssa_rename_p)
{
gimple stmt = gsi_stmt (*gsi);
struct cgraph_node *new_node;
struct cgraph_edge *e = cgraph_edge (node, stmt);
tree fndecl = gimple_call_fndecl (stmt);
/* For indirect calls, pass the address through the runtime. */
if (fndecl == NULL)
{
*need_ssa_rename_p |=
ipa_tm_insert_gettmclone_call (node, region, gsi, stmt);
return;
}
/* Handle some TM builtins. Ordinarily these aren't actually generated
at this point, but handling these functions when written in by the
user makes it easier to build unit tests. */
if (flags_from_decl_or_type (fndecl) & ECF_TM_BUILTIN)
return;
/* Fixup recursive calls inside clones. */
/* ??? Why did cgraph_copy_node_for_versioning update the call edges
for recursion but not update the call statements themselves? */
if (e->caller == e->callee && decl_is_tm_clone (current_function_decl))
{
gimple_call_set_fndecl (stmt, current_function_decl);
return;
}
/* If there is a replacement, use it. */
fndecl = find_tm_replacement_function (fndecl);
if (fndecl)
{
new_node = cgraph_get_create_node (fndecl);
/* ??? Mark all transaction_wrap functions tm_may_enter_irr.
We can't do this earlier in record_tm_replacement because
cgraph_remove_unreachable_nodes is called before we inject
references to the node. Further, we can't do this in some
nice central place in ipa_tm_execute because we don't have
the exact list of wrapper functions that would be used.
Marking more wrappers than necessary results in the creation
of unnecessary cgraph_nodes, which can cause some of the
other IPA passes to crash.
We do need to mark these nodes so that we get the proper
result in expand_call_tm. */
/* ??? This seems broken. How is it that we're marking the
CALLEE as may_enter_irr? Surely we should be marking the
CALLER. Also note that find_tm_replacement_function also
contains mappings into the TM runtime, e.g. memcpy. These
we know won't go irrevocable. */
new_node->local.tm_may_enter_irr = 1;
}
else
{
struct tm_ipa_cg_data *d;
struct cgraph_node *tnode = e->callee;
d = get_cg_data (&tnode, true);
new_node = d->clone;
/* As we've already skipped pure calls and appropriate builtins,
and we've already marked irrevocable blocks, if we can't come
up with a static replacement, then ask the runtime. */
if (new_node == NULL)
{
*need_ssa_rename_p |=
ipa_tm_insert_gettmclone_call (node, region, gsi, stmt);
return;
}
fndecl = new_node->symbol.decl;
}
cgraph_redirect_edge_callee (e, new_node);
gimple_call_set_fndecl (stmt, fndecl);
}
/* Helper function for ipa_tm_transform_calls. For a given BB,
install calls to tm_irrevocable when IRR_BLOCKS are reached,
redirect other calls to the generated transactional clone. */
static bool
ipa_tm_transform_calls_1 (struct cgraph_node *node, struct tm_region *region,
basic_block bb, bitmap irr_blocks)
{
gimple_stmt_iterator gsi;
bool need_ssa_rename = false;
if (irr_blocks && bitmap_bit_p (irr_blocks, bb->index))
{
ipa_tm_insert_irr_call (node, region, bb);
return true;
}
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
if (!is_gimple_call (stmt))
continue;
if (is_tm_pure_call (stmt))
continue;
/* Redirect edges to the appropriate replacement or clone. */
ipa_tm_transform_calls_redirect (node, region, &gsi, &need_ssa_rename);
}
return need_ssa_rename;
}
/* Walk the CFG for REGION, beginning at BB. Install calls to
tm_irrevocable when IRR_BLOCKS are reached, redirect other calls to
the generated transactional clone. */
static bool
ipa_tm_transform_calls (struct cgraph_node *node, struct tm_region *region,
basic_block bb, bitmap irr_blocks)
{
bool need_ssa_rename = false;
edge e;
edge_iterator ei;
vec queue = vNULL;
bitmap visited_blocks = BITMAP_ALLOC (NULL);
queue.safe_push (bb);
do
{
bb = queue.pop ();
need_ssa_rename |=
ipa_tm_transform_calls_1 (node, region, bb, irr_blocks);
if (irr_blocks && bitmap_bit_p (irr_blocks, bb->index))
continue;
if (region && bitmap_bit_p (region->exit_blocks, bb->index))
continue;
FOR_EACH_EDGE (e, ei, bb->succs)
if (!bitmap_bit_p (visited_blocks, e->dest->index))
{
bitmap_set_bit (visited_blocks, e->dest->index);
queue.safe_push (e->dest);
}
}
while (!queue.is_empty ());
queue.release ();
BITMAP_FREE (visited_blocks);
return need_ssa_rename;
}
/* Transform the calls within the TM regions within NODE. */
static void
ipa_tm_transform_transaction (struct cgraph_node *node)
{
struct tm_ipa_cg_data *d;
struct tm_region *region;
bool need_ssa_rename = false;
d = get_cg_data (&node, true);
push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
calculate_dominance_info (CDI_DOMINATORS);
for (region = d->all_tm_regions; region; region = region->next)
{
/* If we're sure to go irrevocable, don't transform anything. */
if (d->irrevocable_blocks_normal
&& bitmap_bit_p (d->irrevocable_blocks_normal,
region->entry_block->index))
{
transaction_subcode_ior (region, GTMA_DOES_GO_IRREVOCABLE
| GTMA_MAY_ENTER_IRREVOCABLE
| GTMA_HAS_NO_INSTRUMENTATION);
continue;
}
need_ssa_rename |=
ipa_tm_transform_calls (node, region, region->entry_block,
d->irrevocable_blocks_normal);
}
if (need_ssa_rename)
update_ssa (TODO_update_ssa_only_virtuals);
pop_cfun ();
}
/* Transform the calls within the transactional clone of NODE. */
static void
ipa_tm_transform_clone (struct cgraph_node *node)
{
struct tm_ipa_cg_data *d;
bool need_ssa_rename;
d = get_cg_data (&node, true);
/* If this function makes no calls and has no irrevocable blocks,
then there's nothing to do. */
/* ??? Remove non-aborting top-level transactions. */
if (!node->callees && !node->indirect_calls && !d->irrevocable_blocks_clone)
return;
push_cfun (DECL_STRUCT_FUNCTION (d->clone->symbol.decl));
calculate_dominance_info (CDI_DOMINATORS);
need_ssa_rename =
ipa_tm_transform_calls (d->clone, NULL, single_succ (ENTRY_BLOCK_PTR),
d->irrevocable_blocks_clone);
if (need_ssa_rename)
update_ssa (TODO_update_ssa_only_virtuals);
pop_cfun ();
}
/* Main entry point for the transactional memory IPA pass. */
static unsigned int
ipa_tm_execute (void)
{
cgraph_node_queue tm_callees = cgraph_node_queue();
/* List of functions that will go irrevocable. */
cgraph_node_queue irr_worklist = cgraph_node_queue();
struct cgraph_node *node;
struct tm_ipa_cg_data *d;
enum availability a;
unsigned int i;
#ifdef ENABLE_CHECKING
verify_cgraph ();
#endif
bitmap_obstack_initialize (&tm_obstack);
initialize_original_copy_tables ();
/* For all local functions marked tm_callable, queue them. */
FOR_EACH_DEFINED_FUNCTION (node)
if (is_tm_callable (node->symbol.decl)
&& cgraph_function_body_availability (node) >= AVAIL_OVERWRITABLE)
{
d = get_cg_data (&node, true);
maybe_push_queue (node, &tm_callees, &d->in_callee_queue);
}
/* For all local reachable functions... */
FOR_EACH_DEFINED_FUNCTION (node)
if (node->lowered
&& cgraph_function_body_availability (node) >= AVAIL_OVERWRITABLE)
{
/* ... marked tm_pure, record that fact for the runtime by
indicating that the pure function is its own tm_callable.
No need to do this if the function's address can't be taken. */
if (is_tm_pure (node->symbol.decl))
{
if (!node->local.local)
record_tm_clone_pair (node->symbol.decl, node->symbol.decl);
continue;
}
push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
calculate_dominance_info (CDI_DOMINATORS);
tm_region_init (NULL);
if (all_tm_regions)
{
d = get_cg_data (&node, true);
/* Scan for calls that are in each transaction, and
generate the uninstrumented code path. */
ipa_tm_scan_calls_transaction (d, &tm_callees);
/* Put it in the worklist so we can scan the function
later (ipa_tm_scan_irr_function) and mark the
irrevocable blocks. */
maybe_push_queue (node, &irr_worklist, &d->in_worklist);
d->want_irr_scan_normal = true;
}
pop_cfun ();
}
/* For every local function on the callee list, scan as if we will be
creating a transactional clone, queueing all new functions we find
along the way. */
for (i = 0; i < tm_callees.length (); ++i)
{
node = tm_callees[i];
a = cgraph_function_body_availability (node);
d = get_cg_data (&node, true);
/* Put it in the worklist so we can scan the function later
(ipa_tm_scan_irr_function) and mark the irrevocable
blocks. */
maybe_push_queue (node, &irr_worklist, &d->in_worklist);
/* Some callees cannot be arbitrarily cloned. These will always be
irrevocable. Mark these now, so that we need not scan them. */
if (is_tm_irrevocable (node->symbol.decl))
ipa_tm_note_irrevocable (node, &irr_worklist);
else if (a <= AVAIL_NOT_AVAILABLE
&& !is_tm_safe_or_pure (node->symbol.decl))
ipa_tm_note_irrevocable (node, &irr_worklist);
else if (a >= AVAIL_OVERWRITABLE)
{
if (!tree_versionable_function_p (node->symbol.decl))
ipa_tm_note_irrevocable (node, &irr_worklist);
else if (!d->is_irrevocable)
{
/* If this is an alias, make sure its base is queued as well.
we need not scan the callees now, as the base will do. */
if (node->symbol.alias)
{
node = cgraph_get_node (node->thunk.alias);
d = get_cg_data (&node, true);
maybe_push_queue (node, &tm_callees, &d->in_callee_queue);
continue;
}
/* Add all nodes called by this function into
tm_callees as well. */
ipa_tm_scan_calls_clone (node, &tm_callees);
}
}
}
/* Iterate scans until no more work to be done. Prefer not to use
vec::pop because the worklist tends to follow a breadth-first
search of the callgraph, which should allow convergance with a
minimum number of scans. But we also don't want the worklist
array to grow without bound, so we shift the array up periodically. */
for (i = 0; i < irr_worklist.length (); ++i)
{
if (i > 256 && i == irr_worklist.length () / 8)
{
irr_worklist.block_remove (0, i);
i = 0;
}
node = irr_worklist[i];
d = get_cg_data (&node, true);
d->in_worklist = false;
if (d->want_irr_scan_normal)
{
d->want_irr_scan_normal = false;
ipa_tm_scan_irr_function (node, false);
}
if (d->in_callee_queue && ipa_tm_scan_irr_function (node, true))
ipa_tm_note_irrevocable (node, &irr_worklist);
}
/* For every function on the callee list, collect the tm_may_enter_irr
bit on the node. */
irr_worklist.truncate (0);
for (i = 0; i < tm_callees.length (); ++i)
{
node = tm_callees[i];
if (ipa_tm_mayenterirr_function (node))
{
d = get_cg_data (&node, true);
gcc_assert (d->in_worklist == false);
maybe_push_queue (node, &irr_worklist, &d->in_worklist);
}
}
/* Propagate the tm_may_enter_irr bit to callers until stable. */
for (i = 0; i < irr_worklist.length (); ++i)
{
struct cgraph_node *caller;
struct cgraph_edge *e;
struct ipa_ref *ref;
unsigned j;
if (i > 256 && i == irr_worklist.length () / 8)
{
irr_worklist.block_remove (0, i);
i = 0;
}
node = irr_worklist[i];
d = get_cg_data (&node, true);
d->in_worklist = false;
node->local.tm_may_enter_irr = true;
/* Propagate back to normal callers. */
for (e = node->callers; e ; e = e->next_caller)
{
caller = e->caller;
if (!is_tm_safe_or_pure (caller->symbol.decl)
&& !caller->local.tm_may_enter_irr)
{
d = get_cg_data (&caller, true);
maybe_push_queue (caller, &irr_worklist, &d->in_worklist);
}
}
/* Propagate back to referring aliases as well. */
for (j = 0; ipa_ref_list_referring_iterate (&node->symbol.ref_list, j, ref); j++)
{
caller = cgraph (ref->referring);
if (ref->use == IPA_REF_ALIAS
&& !caller->local.tm_may_enter_irr)
{
/* ?? Do not traverse aliases here. */
d = get_cg_data (&caller, false);
maybe_push_queue (caller, &irr_worklist, &d->in_worklist);
}
}
}
/* Now validate all tm_safe functions, and all atomic regions in
other functions. */
FOR_EACH_DEFINED_FUNCTION (node)
if (node->lowered
&& cgraph_function_body_availability (node) >= AVAIL_OVERWRITABLE)
{
d = get_cg_data (&node, true);
if (is_tm_safe (node->symbol.decl))
ipa_tm_diagnose_tm_safe (node);
else if (d->all_tm_regions)
ipa_tm_diagnose_transaction (node, d->all_tm_regions);
}
/* Create clones. Do those that are not irrevocable and have a
positive call count. Do those publicly visible functions that
the user directed us to clone. */
for (i = 0; i < tm_callees.length (); ++i)
{
bool doit = false;
node = tm_callees[i];
if (node->symbol.cpp_implicit_alias)
continue;
a = cgraph_function_body_availability (node);
d = get_cg_data (&node, true);
if (a <= AVAIL_NOT_AVAILABLE)
doit = is_tm_callable (node->symbol.decl);
else if (a <= AVAIL_AVAILABLE && is_tm_callable (node->symbol.decl))
doit = true;
else if (!d->is_irrevocable
&& d->tm_callers_normal + d->tm_callers_clone > 0)
doit = true;
if (doit)
ipa_tm_create_version (node);
}
/* Redirect calls to the new clones, and insert irrevocable marks. */
for (i = 0; i < tm_callees.length (); ++i)
{
node = tm_callees[i];
if (node->symbol.analyzed)
{
d = get_cg_data (&node, true);
if (d->clone)
ipa_tm_transform_clone (node);
}
}
FOR_EACH_DEFINED_FUNCTION (node)
if (node->lowered
&& cgraph_function_body_availability (node) >= AVAIL_OVERWRITABLE)
{
d = get_cg_data (&node, true);
if (d->all_tm_regions)
ipa_tm_transform_transaction (node);
}
/* Free and clear all data structures. */
tm_callees.release ();
irr_worklist.release ();
bitmap_obstack_release (&tm_obstack);
free_original_copy_tables ();
FOR_EACH_FUNCTION (node)
node->symbol.aux = NULL;
#ifdef ENABLE_CHECKING
verify_cgraph ();
#endif
return 0;
}
struct simple_ipa_opt_pass pass_ipa_tm =
{
{
SIMPLE_IPA_PASS,
"tmipa", /* name */
OPTGROUP_NONE, /* optinfo_flags */
gate_tm, /* gate */
ipa_tm_execute, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_TRANS_MEM, /* tv_id */
PROP_ssa | PROP_cfg, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
},
};
#include "gt-trans-mem.h"