summaryrefslogtreecommitdiff
path: root/gcc/omp-low.c
diff options
context:
space:
mode:
authorrazya <razya@138bc75d-0d04-0410-961f-82ee72b054a4>2007-10-29 11:05:04 +0000
committerrazya <razya@138bc75d-0d04-0410-961f-82ee72b054a4>2007-10-29 11:05:04 +0000
commitcb7f680b625e277f171438aa11776154de58bed1 (patch)
tree6b8a35dcec87c3829ed38ac34249604c36916f9c /gcc/omp-low.c
parent2adb675d8eb150e3e210753cfb220b4602169c22 (diff)
downloadgcc-cb7f680b625e277f171438aa11776154de58bed1.tar.gz
2007-09-23 Razya Ladelsky
Zdenek Dvorak OMP_ATOMIC Changes, Reduction support for automatic parallelization. * expr.c (expand_expr_real_1): Add cases for OMP_ATOMIC_LOAD, OMP_ATOMIC_STORE. * Makefile.in: Add dependencies to expr.o, tree-parloops.o, omp-low.o * tree-pretty-print.c (dump_generic_node): Add OMP_ATOMIC_LOAD and OMP_ATOMIC_STORE. * tree.h (OMP_DIRECTIVE_P): Add OMP_ATOMIC_LOAD, OMP_ATOMIC_STORE. * gimple-low.c (lower_stmt): Same. * gimplify.c (gimplify_expr): Same. (gimplify_omp_atomic_fetch_op, gimplify_omp_atomic_pipeline, gimplify_omp_atomic_mutex): Remove. (gimplify_omp_atomic): Change it to simply gimplify the statement instead of expanding it. * omp-low.c: Add includes to optabs.h, cfgloop.h. (expand_omp_atomic, expand_omp_atomic_pipeline, goa_stabilize_expr, expand_omp_atomic_mutex, expand_omp_atomic_fetch_op): New functions to implement expansion of OMP_ATOMIC. (expand_omp, build_omp_regions_1): Add support for OMP_ATOMIC_LOAD/OMP_ATOMIC_STORE. * tree-cfg.c (make_edges): add case for OMP_ATOMIC_LOAD, OMP_ATOMIC_STORE. * tree-gimple.c (is_gimple_stmt): Add OMP_ATOMIC_LOAD, OMP_ATOMIC_STORE. * tree-parloops.c: add include to tree-vectorizer.h. (reduction_info): New structure for reduction. (reduction_list): New list to represent list of reductions per loop. (struct data_arg): New helper structure for reduction. (reduction_info_hash, reduction_info_eq, reduction_phi, initialize_reductions, create_call_for_reduction, create_phi_for_local_result, create_call_for_reduction_1, create_loads_for_reductions, create_final_loads_for_reduction): New functions. (loop_parallel_p): Identify reductions, add reduction_list parameter. (separate_decls_in_loop_name): Support reduction variables. (separate_decls_in_loop): Add reduction_list and ld_st_data arguments, call create_loads_for_reduction for each reduction. (canonicalize_loop_ivs): Identify reductions, add reduction_list parameter. (transform_to_exit_first_loop): Add reduction support, add reduction_list parameter. (gen_parallel_loop): Add reduction_list parameter. Add call separate_decls_in_loop with the new argument. Traverse reductions and call initialize_reductions, create_call_for_reduction. (parallelize_loops): Create and delete the reduction list. (add_field_for_name): Change use of data parameter. Add fields for reductions. * tree-vectorizer.h (vect_analyze_loop_form): Add declaration. * tree-vect-analyze.c (vect_analyze_loop_form): export it. * tree.def: Add definitions for OMP_ATOMIC_LOAD, OMP_ATOMIC_STORE. * tree-inline.c (estimate_num_insns_1): add cases for OMP_ATOMIC_LOAD, OMP_ATOMIC_STORE. * tree-cfg.c (make_edges): Add OMP_ATOMIC_LOAD, OMP_ATOMIC_STORE. * tree-ssa-operands.c (get_addr_dereference_operands): New function. Subroutine of get_indirect_ref_operands. (get_indirect_ref_operands): Call get_addr_dereference_operands. (get_expr_operands): Support OMP_ATOMIC_LOAD, OMP_ATOMIC_STORE. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@129716 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/omp-low.c')
-rw-r--r--gcc/omp-low.c373
1 files changed, 369 insertions, 4 deletions
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 421b5c62863..20f36e047b1 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -41,7 +41,8 @@ along with GCC; see the file COPYING3. If not see
#include "ggc.h"
#include "except.h"
#include "splay-tree.h"
-
+#include "optabs.h"
+#include "cfgloop.h"
/* Lowering of OpenMP parallel and workshare constructs proceeds in two
phases. The first phase scans the function looking for OMP statements
@@ -2597,7 +2598,7 @@ expand_omp_parallel (struct omp_region *region)
rebuild_cgraph_edges ();
pop_cfun ();
}
-
+
/* Emit a library call to launch the children threads. */
expand_parallel_call (region, new_bb, entry_stmt, ws_args);
update_ssa (TODO_update_ssa_only_virtuals);
@@ -3540,6 +3541,355 @@ expand_omp_synch (struct omp_region *region)
}
}
+/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
+ operation as a __sync_fetch_and_op builtin. INDEX is log2 of the
+ size of the data type, and thus usable to find the index of the builtin
+ decl. Returns false if the expression is not of the proper form. */
+
+static bool
+expand_omp_atomic_fetch_op (basic_block load_bb,
+ tree addr, tree loaded_val,
+ tree stored_val, int index)
+{
+ enum built_in_function base;
+ tree decl, itype, call;
+ enum insn_code *optab;
+ tree rhs;
+ basic_block store_bb = single_succ (load_bb);
+ block_stmt_iterator bsi;
+ tree stmt;
+
+ /* We expect to find the following sequences:
+
+ load_bb:
+ OMP_ATOMIC_LOAD (tmp, mem)
+
+ store_bb:
+ val = tmp OP something; (or: something OP tmp)
+ OMP_STORE (val)
+
+ ???FIXME: Allow a more flexible sequence.
+ Perhaps use data flow to pick the statements.
+
+ */
+
+ bsi = bsi_after_labels (store_bb);
+ stmt = bsi_stmt (bsi);
+ if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
+ return false;
+ bsi_next (&bsi);
+ if (TREE_CODE (bsi_stmt (bsi)) != OMP_ATOMIC_STORE)
+ return false;
+
+ if (!operand_equal_p (GIMPLE_STMT_OPERAND (stmt, 0), stored_val, 0))
+ return false;
+
+ rhs = GIMPLE_STMT_OPERAND (stmt, 1);
+
+ /* Check for one of the supported fetch-op operations. */
+ switch (TREE_CODE (rhs))
+ {
+ case PLUS_EXPR:
+ case POINTER_PLUS_EXPR:
+ base = BUILT_IN_FETCH_AND_ADD_N;
+ optab = sync_add_optab;
+ break;
+ case MINUS_EXPR:
+ base = BUILT_IN_FETCH_AND_SUB_N;
+ optab = sync_add_optab;
+ break;
+ case BIT_AND_EXPR:
+ base = BUILT_IN_FETCH_AND_AND_N;
+ optab = sync_and_optab;
+ break;
+ case BIT_IOR_EXPR:
+ base = BUILT_IN_FETCH_AND_OR_N;
+ optab = sync_ior_optab;
+ break;
+ case BIT_XOR_EXPR:
+ base = BUILT_IN_FETCH_AND_XOR_N;
+ optab = sync_xor_optab;
+ break;
+ default:
+ return false;
+ }
+ /* Make sure the expression is of the proper form. */
+ if (operand_equal_p (TREE_OPERAND (rhs, 0), loaded_val, 0))
+ rhs = TREE_OPERAND (rhs, 1);
+ else if (commutative_tree_code (TREE_CODE (rhs))
+ && operand_equal_p (TREE_OPERAND (rhs, 1), loaded_val, 0))
+ rhs = TREE_OPERAND (rhs, 0);
+ else
+ return false;
+
+ decl = built_in_decls[base + index + 1];
+ itype = TREE_TYPE (TREE_TYPE (decl));
+
+ if (optab[TYPE_MODE (itype)] == CODE_FOR_nothing)
+ return false;
+
+ bsi = bsi_last (load_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD);
+ call = build_call_expr (decl, 2, addr, fold_convert (itype, rhs));
+ force_gimple_operand_bsi (&bsi, call, true, NULL_TREE, true, BSI_SAME_STMT);
+ bsi_remove (&bsi, true);
+
+ bsi = bsi_last (store_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE);
+ bsi_remove (&bsi, true);
+ bsi = bsi_last (store_bb);
+ bsi_remove (&bsi, true);
+
+ if (gimple_in_ssa_p (cfun))
+ update_ssa (TODO_update_ssa_no_phi);
+
+ return true;
+}
+
+/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
+
+ oldval = *addr;
+ repeat:
+ newval = rhs; // with oldval replacing *addr in rhs
+ oldval = __sync_val_compare_and_swap (addr, oldval, newval);
+ if (oldval != newval)
+ goto repeat;
+
+ INDEX is log2 of the size of the data type, and thus usable to find the
+ index of the builtin decl. */
+
+static bool
+expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
+ tree addr, tree loaded_val, tree stored_val,
+ int index)
+{
+ tree loadedi, storedi, initial, new_stored, new_storedi, old_vali;
+ tree type, itype, cmpxchg, iaddr;
+ block_stmt_iterator bsi;
+ basic_block loop_header = single_succ (load_bb);
+ tree phi, x;
+ edge e;
+
+ cmpxchg = built_in_decls[BUILT_IN_VAL_COMPARE_AND_SWAP_N + index + 1];
+ type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
+ itype = TREE_TYPE (TREE_TYPE (cmpxchg));
+
+ if (sync_compare_and_swap[TYPE_MODE (itype)] == CODE_FOR_nothing)
+ return false;
+
+ /* Load the initial value, replacing the OMP_ATOMIC_LOAD. */
+ bsi = bsi_last (load_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD);
+ initial = force_gimple_operand_bsi (&bsi, build_fold_indirect_ref (addr),
+ true, NULL_TREE, true, BSI_SAME_STMT);
+ /* Move the value to the LOADED_VAL temporary. */
+ if (gimple_in_ssa_p (cfun))
+ {
+ gcc_assert (phi_nodes (loop_header) == NULL_TREE);
+ phi = create_phi_node (loaded_val, loop_header);
+ SSA_NAME_DEF_STMT (loaded_val) = phi;
+ SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
+ initial);
+ }
+ else
+ bsi_insert_before (&bsi,
+ build_gimple_modify_stmt (loaded_val, initial),
+ BSI_SAME_STMT);
+ bsi_remove (&bsi, true);
+
+ bsi = bsi_last (store_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE);
+
+ /* For floating-point values, we'll need to view-convert them to integers
+ so that we can perform the atomic compare and swap. Simplify the
+ following code by always setting up the "i"ntegral variables. */
+ if (INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
+ {
+ loadedi = loaded_val;
+ storedi = stored_val;
+ iaddr = addr;
+ }
+ else
+ {
+ loadedi = force_gimple_operand_bsi (&bsi,
+ build1 (VIEW_CONVERT_EXPR, itype,
+ loaded_val), true,
+ NULL_TREE, true, BSI_SAME_STMT);
+ storedi =
+ force_gimple_operand_bsi (&bsi,
+ build1 (VIEW_CONVERT_EXPR, itype,
+ stored_val), true, NULL_TREE, true,
+ BSI_SAME_STMT);
+ iaddr = fold_convert (build_pointer_type (itype), addr);
+ }
+
+ /* Build the compare&swap statement. */
+ new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
+ new_storedi = force_gimple_operand_bsi (&bsi,
+ fold_convert (itype, new_storedi),
+ true, NULL_TREE,
+ true, BSI_SAME_STMT);
+ if (storedi == stored_val)
+ new_stored = new_storedi;
+ else
+ new_stored = force_gimple_operand_bsi (&bsi,
+ build1 (VIEW_CONVERT_EXPR, type,
+ new_storedi), true,
+ NULL_TREE, true, BSI_SAME_STMT);
+
+ if (gimple_in_ssa_p (cfun))
+ old_vali = loadedi;
+ else
+ {
+ old_vali = create_tmp_var (itype, NULL);
+ x = build_gimple_modify_stmt (old_vali, loadedi);
+ bsi_insert_before (&bsi, x, BSI_SAME_STMT);
+
+ x = build_gimple_modify_stmt (loaded_val, new_stored);
+ bsi_insert_before (&bsi, x, BSI_SAME_STMT);
+ }
+
+ /* Note that we always perform the comparison as an integer, even for
+ floating point. This allows the atomic operation to properly
+ succeed even with NaNs and -0.0. */
+ x = build3 (COND_EXPR, void_type_node,
+ build2 (NE_EXPR, boolean_type_node,
+ new_storedi, old_vali), NULL_TREE, NULL_TREE);
+ bsi_insert_before (&bsi, x, BSI_SAME_STMT);
+
+ /* Update cfg. */
+ e = single_succ_edge (store_bb);
+ e->flags &= ~EDGE_FALLTHRU;
+ e->flags |= EDGE_FALSE_VALUE;
+
+ e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
+
+ /* Copy the new value to loaded_val (we already did that before the condition
+ if we are not in SSA). */
+ if (gimple_in_ssa_p (cfun))
+ {
+ phi = phi_nodes (loop_header);
+ SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_stored);
+ }
+
+ /* Remove OMP_ATOMIC_STORE. */
+ bsi_remove (&bsi, true);
+
+ if (gimple_in_ssa_p (cfun))
+ update_ssa (TODO_update_ssa_no_phi);
+
+ return true;
+}
+
+/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
+
+ GOMP_atomic_start ();
+ *addr = rhs;
+ GOMP_atomic_end ();
+
+ The result is not globally atomic, but works so long as all parallel
+ references are within #pragma omp atomic directives. According to
+ responses received from omp@openmp.org, appears to be within spec.
+ Which makes sense, since that's how several other compilers handle
+ this situation as well.
+ LOADED_VAL and ADDR are the operands of OMP_ATOMIC_LOAD we're expanding.
+ STORED_VAL is the operand of the matching OMP_ATOMIC_STORE.
+
+ We replace
+ OMP_ATOMIC_LOAD (loaded_val, addr) with
+ loaded_val = *addr;
+
+ and replace
+ OMP_ATOMIC_ATORE (stored_val) with
+ *addr = stored_val;
+*/
+
+static bool
+expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
+ tree addr, tree loaded_val, tree stored_val)
+{
+ block_stmt_iterator bsi;
+ tree t;
+
+ bsi = bsi_last (load_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD);
+
+ t = built_in_decls[BUILT_IN_GOMP_ATOMIC_START];
+ t = build_function_call_expr (t, 0);
+ force_gimple_operand_bsi (&bsi, t, true, NULL_TREE, true, BSI_SAME_STMT);
+
+ t = build_gimple_modify_stmt (loaded_val, build_fold_indirect_ref (addr));
+ if (gimple_in_ssa_p (cfun))
+ SSA_NAME_DEF_STMT (loaded_val) = t;
+ bsi_insert_before (&bsi, t, BSI_SAME_STMT);
+ bsi_remove (&bsi, true);
+
+ bsi = bsi_last (store_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE);
+
+ t = build_gimple_modify_stmt (build_fold_indirect_ref (unshare_expr (addr)),
+ stored_val);
+ bsi_insert_before (&bsi, t, BSI_SAME_STMT);
+
+ t = built_in_decls[BUILT_IN_GOMP_ATOMIC_END];
+ t = build_function_call_expr (t, 0);
+ force_gimple_operand_bsi (&bsi, t, true, NULL_TREE, true, BSI_SAME_STMT);
+ bsi_remove (&bsi, true);
+
+ if (gimple_in_ssa_p (cfun))
+ update_ssa (TODO_update_ssa_no_phi);
+ return true;
+}
+
+/* Expand an OMP_ATOMIC statement. We try to expand
+ using expand_omp_atomic_fetch_op. If it failed, we try to
+ call expand_omp_atomic_pipeline, and if it fails too, the
+ ultimate fallback is wrapping the operation in a mutex
+ (expand_omp_atomic_mutex). REGION is the atomic region built
+ by build_omp_regions_1(). */
+
+static void
+expand_omp_atomic (struct omp_region *region)
+{
+ basic_block load_bb = region->entry, store_bb = region->exit;
+ tree load = last_stmt (load_bb), store = last_stmt (store_bb);
+ tree loaded_val = TREE_OPERAND (load, 0);
+ tree addr = TREE_OPERAND (load, 1);
+ tree stored_val = TREE_OPERAND (store, 0);
+ tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
+ HOST_WIDE_INT index;
+
+ /* Make sure the type is one of the supported sizes. */
+ index = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
+ index = exact_log2 (index);
+ if (index >= 0 && index <= 4)
+ {
+ unsigned int align = TYPE_ALIGN_UNIT (type);
+
+ /* __sync builtins require strict data alignment. */
+ if (exact_log2 (align) >= index)
+ {
+ /* When possible, use specialized atomic update functions. */
+ if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
+ && store_bb == single_succ (load_bb))
+ {
+ if (expand_omp_atomic_fetch_op (load_bb, addr,
+ loaded_val, stored_val, index))
+ return;
+ }
+
+ /* If we don't have specialized __sync builtins, try and implement
+ as a compare and swap loop. */
+ if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
+ loaded_val, stored_val, index))
+ return;
+ }
+ }
+
+ /* The ultimate fallback is wrapping the operation in a mutex. */
+ expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
+}
+
/* Expand the parallel region tree rooted at REGION. Expansion
proceeds in depth-first order. Innermost regions are expanded
@@ -3584,6 +3934,11 @@ expand_omp (struct omp_region *region)
expand_omp_synch (region);
break;
+ case OMP_ATOMIC_LOAD:
+ expand_omp_atomic (region);
+ break;
+
+
default:
gcc_unreachable ();
}
@@ -3614,7 +3969,6 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent,
stmt = bsi_stmt (si);
code = TREE_CODE (stmt);
-
if (code == OMP_RETURN)
{
/* STMT is the return point out of region PARENT. Mark it
@@ -3630,6 +3984,17 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent,
if (region->type == OMP_PARALLEL)
determine_parallel_type (region);
}
+ else if (code == OMP_ATOMIC_STORE)
+ {
+ /* OMP_ATOMIC_STORE is analoguous to OMP_RETURN, but matches with
+ OMP_ATOMIC_LOAD. */
+ gcc_assert (parent);
+ gcc_assert (parent->type == OMP_ATOMIC_LOAD);
+ region = parent;
+ region->exit = bb;
+ parent = parent->outer;
+ }
+
else if (code == OMP_CONTINUE)
{
gcc_assert (parent);
@@ -3638,7 +4003,7 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent,
else if (code == OMP_SECTIONS_SWITCH)
{
/* OMP_SECTIONS_SWITCH is part of OMP_SECTIONS, and we do nothing for
- it. */
+ it. */ ;
}
else
{